diff --git a/README.md b/README.md index ac941ba3d..7b8b847da 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,10 @@ The following example can view in the [langchain-example](langchain-examples/src - [SQL Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SqlChainExample.java) - [API Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/ApiChainExample.java) -- [QA-Milvus](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java) -- [QA-Pinecone](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java) +- [QA-Milvus-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java) +- [QA-Pinecone-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java) - [QA-Pinecone-Markdown](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalMarkdownExample.java) +- [Summarization](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java) - [Agent with Google Search](langchain-examples/src/main/java/com/hw/langchain/examples/agents/LlmAgentExample.java) - [Spark SQL AI](langchain-bigdata/langchain-spark/src/test/java/com/hw/langchain/agents/toolkits/spark/sql/toolkit/SparkSqlToolkitTest.java) - [Flink SQL AI](langchain-bigdata/langchain-flink/src/test/java/com/hw/langchain/agents/toolkits/flink/sql/toolkit/FlinkSqlToolkitTest.java) diff --git a/langchain-core/pom.xml b/langchain-core/pom.xml index bce4cb80f..cf5fb14cf 100644 --- a/langchain-core/pom.xml +++ b/langchain-core/pom.xml @@ -75,6 +75,11 @@ jtokkit + + org.jsoup + jsoup + + org.slf4j slf4j-api diff --git a/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java b/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java index 13733e954..aab019a82 100644 --- a/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java +++ b/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java @@ -84,7 +84,7 @@ private void validateOutputs(Map outputs) { * If False, both input keys and new keys generated by this chain will be returned. * Defaults to False. */ - public Map call(String input, boolean returnOnlyOutputs) { + public Map call(Object input, boolean returnOnlyOutputs) { Map inputs = prepInputs(input); return call(inputs, returnOnlyOutputs); } @@ -126,7 +126,7 @@ private Map prepOutputs(Map inputs, Map prepInputs(String input) { + private Map prepInputs(Object input) { Set inputKeys = new HashSet<>(inputKeys()); if (memory != null) { // If there are multiple input keys, but some get set by memory so that only one is not set, @@ -162,7 +162,7 @@ public Map prepInputs(Map inputs) { /** * Run the chain as text in, text out */ - public String run(String args) { + public String run(Object args) { if (outputKeys().size() != 1) { throw new IllegalArgumentException( "The `run` method is not supported when there is not exactly one output key. Got " + outputKeys() diff --git a/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java b/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java index 5028ef220..6197cd379 100644 --- a/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java +++ b/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java @@ -18,6 +18,12 @@ package com.hw.langchain.chains.summarize; +import com.hw.langchain.base.language.BaseLanguageModel; +import com.hw.langchain.chains.combine.documents.stuff.StuffDocumentsChain; +import com.hw.langchain.chains.combine.documents.stuff.StuffUtils; +import com.hw.langchain.chains.llm.LLMChain; +import com.hw.langchain.prompts.base.BasePromptTemplate; + /** * @author HamaWhite */ @@ -26,4 +32,15 @@ public class SummarizeUtils { private SummarizeUtils() { throw new IllegalStateException("Utility class"); } + + public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm) { + return loadStuffChain(llm, StuffPrompt.PROMPT, "text", "\n\n"); + } + + public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm, BasePromptTemplate prompt, + String documentVariableName, String documentSeparator) { + LLMChain llmChain = new LLMChain(llm, prompt); + return new StuffDocumentsChain(llmChain, StuffUtils.getDefaultDocumentPrompt(), documentVariableName, + documentSeparator); + } } diff --git a/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java b/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java new file mode 100644 index 000000000..1bc68bc47 --- /dev/null +++ b/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.hw.langchain.document.loaders; + +import com.google.common.collect.Maps; +import com.hw.langchain.document.loaders.base.BaseLoader; +import com.hw.langchain.exception.LangChainException; +import com.hw.langchain.schema.Document; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * @author HamaWhite + */ +public class WebBaseLoader extends BaseLoader { + + private final List webUrls; + + public WebBaseLoader(List webUrls) { + this.webUrls = webUrls; + } + + @Override + public List load() { + List documents = new ArrayList<>(webUrls.size()); + for (String url : webUrls) { + try { + org.jsoup.nodes.Document doc = Jsoup.connect(url).get(); + Map metadata = buildMetadata(doc, url); + + documents.add(new Document(doc.wholeText(), metadata)); + } catch (IOException e) { + throw new LangChainException(errorMessage(url), e); + } + } + return documents; + } + + private Map buildMetadata(org.jsoup.nodes.Document doc, String url) { + Map metadata = Maps.newHashMap(); + metadata.put("source", url); + + Element title = doc.select("title").first(); + if (title != null) { + metadata.put("title", title.text()); + } + Element description = doc.select("meta[name=description]").first(); + metadata.put("description", description != null ? description.attr("content") : "No description found."); + + Element html = doc.select("html").first(); + metadata.put("language", html != null ? html.attr("lang") : "No language found."); + return metadata; + } +} diff --git a/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java b/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java new file mode 100644 index 000000000..d945012b0 --- /dev/null +++ b/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.hw.langchain.chains.summarize; + +import com.hw.langchain.base.language.BaseLanguageModel; +import com.hw.langchain.document.loaders.WebBaseLoader; +import com.hw.langchain.llms.openai.OpenAIChat; +import com.hw.langchain.schema.Document; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Summarization use cases + * + * @author HamaWhite + */ +@Disabled("Test requires costly OpenAI calls, can be run manually.") +class SummarizeUtilsTest { + + private static BaseLanguageModel llm; + + private static List docs; + + @BeforeAll + static void setUp() { + llm = OpenAIChat.builder() + .temperature(0) + .model("gpt-3.5-turbo-16k") + .build() + .init(); + + var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/")); + docs = loader.load(); + } + + @Test + void testLoadStuffChain() { + var chain = SummarizeUtils.loadStuffChain(llm); + var actual = chain.run(docs); + + var expected = + "The article discusses the concept of building autonomous agents powered by large language models " + + "(LLMs). It explores the components of such agents, including planning, memory, and tool " + + "use. The article provides case studies and proof-of-concept examples of LLM-powered agents, " + + "as well as challenges and limitations associated with their development."; + assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java b/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java new file mode 100644 index 000000000..b2468b75f --- /dev/null +++ b/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.hw.langchain.examples.chains; + +import com.hw.langchain.chains.summarize.SummarizeUtils; +import com.hw.langchain.document.loaders.WebBaseLoader; +import com.hw.langchain.llms.openai.OpenAIChat; + +import java.util.List; + +import static com.hw.langchain.examples.utils.PrintUtils.println; + +/** + * Summarization use cases + * + * @author HamaWhite + */ +public class SummarizationExample { + + public static void main(String[] args) { + var llm = OpenAIChat.builder() + .temperature(0) + .model("gpt-3.5-turbo-16k") + .build() + .init(); + + var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/")); + var docs = loader.load(); + + var chain = SummarizeUtils.loadStuffChain(llm); + var result = chain.run(docs); + + println(result); + } +} diff --git a/langchain-server/pom.xml b/langchain-server/pom.xml deleted file mode 100644 index 1571a0814..000000000 --- a/langchain-server/pom.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - 4.0.0 - - io.github.hamawhitegg - langchain-java - 0.1.11 - - - langchain-server - - - true - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - - diff --git a/langchain-web/pom.xml b/langchain-web/pom.xml deleted file mode 100644 index 113f55563..000000000 --- a/langchain-web/pom.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 4.0.0 - - io.github.hamawhitegg - langchain-java - 0.1.11 - - - langchain-web - - - true - - - - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - - diff --git a/pom.xml b/pom.xml index 535999113..8aea52979 100644 --- a/pom.xml +++ b/pom.xml @@ -13,8 +13,6 @@ openai-client langchain-core - langchain-server - langchain-web serpapi-client pinecone-client langchain-examples @@ -28,6 +26,7 @@ 8.0.32 2.2.9 2.7.3 + 1.16.1 1.0.0-M2.1 1.18.28 0.5.0 @@ -157,6 +156,12 @@ ${reflections.version} + + org.jsoup + jsoup + ${jsoup.version} + + org.slf4j slf4j-api