diff --git a/README.md b/README.md
index ac941ba3d..7b8b847da 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,10 @@ The following example can view in the [langchain-example](langchain-examples/src
- [SQL Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SqlChainExample.java)
- [API Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/ApiChainExample.java)
-- [QA-Milvus](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java)
-- [QA-Pinecone](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java)
+- [QA-Milvus-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java)
+- [QA-Pinecone-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java)
- [QA-Pinecone-Markdown](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalMarkdownExample.java)
+- [Summarization](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java)
- [Agent with Google Search](langchain-examples/src/main/java/com/hw/langchain/examples/agents/LlmAgentExample.java)
- [Spark SQL AI](langchain-bigdata/langchain-spark/src/test/java/com/hw/langchain/agents/toolkits/spark/sql/toolkit/SparkSqlToolkitTest.java)
- [Flink SQL AI](langchain-bigdata/langchain-flink/src/test/java/com/hw/langchain/agents/toolkits/flink/sql/toolkit/FlinkSqlToolkitTest.java)
diff --git a/langchain-core/pom.xml b/langchain-core/pom.xml
index bce4cb80f..cf5fb14cf 100644
--- a/langchain-core/pom.xml
+++ b/langchain-core/pom.xml
@@ -75,6 +75,11 @@
jtokkit
+
+ org.jsoup
+ jsoup
+
+
org.slf4j
slf4j-api
diff --git a/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java b/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java
index 13733e954..aab019a82 100644
--- a/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java
+++ b/langchain-core/src/main/java/com/hw/langchain/chains/base/Chain.java
@@ -84,7 +84,7 @@ private void validateOutputs(Map outputs) {
* If False, both input keys and new keys generated by this chain will be returned.
* Defaults to False.
*/
- public Map call(String input, boolean returnOnlyOutputs) {
+ public Map call(Object input, boolean returnOnlyOutputs) {
Map inputs = prepInputs(input);
return call(inputs, returnOnlyOutputs);
}
@@ -126,7 +126,7 @@ private Map prepOutputs(Map inputs, Map prepInputs(String input) {
+ private Map prepInputs(Object input) {
Set inputKeys = new HashSet<>(inputKeys());
if (memory != null) {
// If there are multiple input keys, but some get set by memory so that only one is not set,
@@ -162,7 +162,7 @@ public Map prepInputs(Map inputs) {
/**
* Run the chain as text in, text out
*/
- public String run(String args) {
+ public String run(Object args) {
if (outputKeys().size() != 1) {
throw new IllegalArgumentException(
"The `run` method is not supported when there is not exactly one output key. Got " + outputKeys()
diff --git a/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java b/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java
index 5028ef220..6197cd379 100644
--- a/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java
+++ b/langchain-core/src/main/java/com/hw/langchain/chains/summarize/SummarizeUtils.java
@@ -18,6 +18,12 @@
package com.hw.langchain.chains.summarize;
+import com.hw.langchain.base.language.BaseLanguageModel;
+import com.hw.langchain.chains.combine.documents.stuff.StuffDocumentsChain;
+import com.hw.langchain.chains.combine.documents.stuff.StuffUtils;
+import com.hw.langchain.chains.llm.LLMChain;
+import com.hw.langchain.prompts.base.BasePromptTemplate;
+
/**
* @author HamaWhite
*/
@@ -26,4 +32,15 @@ public class SummarizeUtils {
private SummarizeUtils() {
throw new IllegalStateException("Utility class");
}
+
+ public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm) {
+ return loadStuffChain(llm, StuffPrompt.PROMPT, "text", "\n\n");
+ }
+
+ public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm, BasePromptTemplate prompt,
+ String documentVariableName, String documentSeparator) {
+ LLMChain llmChain = new LLMChain(llm, prompt);
+ return new StuffDocumentsChain(llmChain, StuffUtils.getDefaultDocumentPrompt(), documentVariableName,
+ documentSeparator);
+ }
}
diff --git a/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java b/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java
new file mode 100644
index 000000000..1bc68bc47
--- /dev/null
+++ b/langchain-core/src/main/java/com/hw/langchain/document/loaders/WebBaseLoader.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hw.langchain.document.loaders;
+
+import com.google.common.collect.Maps;
+import com.hw.langchain.document.loaders.base.BaseLoader;
+import com.hw.langchain.exception.LangChainException;
+import com.hw.langchain.schema.Document;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Element;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author HamaWhite
+ */
+public class WebBaseLoader extends BaseLoader {
+
+ private final List webUrls;
+
+ public WebBaseLoader(List webUrls) {
+ this.webUrls = webUrls;
+ }
+
+ @Override
+ public List load() {
+ List documents = new ArrayList<>(webUrls.size());
+ for (String url : webUrls) {
+ try {
+ org.jsoup.nodes.Document doc = Jsoup.connect(url).get();
+ Map metadata = buildMetadata(doc, url);
+
+ documents.add(new Document(doc.wholeText(), metadata));
+ } catch (IOException e) {
+ throw new LangChainException(errorMessage(url), e);
+ }
+ }
+ return documents;
+ }
+
+ private Map buildMetadata(org.jsoup.nodes.Document doc, String url) {
+ Map metadata = Maps.newHashMap();
+ metadata.put("source", url);
+
+ Element title = doc.select("title").first();
+ if (title != null) {
+ metadata.put("title", title.text());
+ }
+ Element description = doc.select("meta[name=description]").first();
+ metadata.put("description", description != null ? description.attr("content") : "No description found.");
+
+ Element html = doc.select("html").first();
+ metadata.put("language", html != null ? html.attr("lang") : "No language found.");
+ return metadata;
+ }
+}
diff --git a/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java b/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java
new file mode 100644
index 000000000..d945012b0
--- /dev/null
+++ b/langchain-core/src/test/java/com/hw/langchain/chains/summarize/SummarizeUtilsTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hw.langchain.chains.summarize;
+
+import com.hw.langchain.base.language.BaseLanguageModel;
+import com.hw.langchain.document.loaders.WebBaseLoader;
+import com.hw.langchain.llms.openai.OpenAIChat;
+import com.hw.langchain.schema.Document;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Summarization use cases
+ *
+ * @author HamaWhite
+ */
+@Disabled("Test requires costly OpenAI calls, can be run manually.")
+class SummarizeUtilsTest {
+
+ private static BaseLanguageModel llm;
+
+ private static List docs;
+
+ @BeforeAll
+ static void setUp() {
+ llm = OpenAIChat.builder()
+ .temperature(0)
+ .model("gpt-3.5-turbo-16k")
+ .build()
+ .init();
+
+ var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/"));
+ docs = loader.load();
+ }
+
+ @Test
+ void testLoadStuffChain() {
+ var chain = SummarizeUtils.loadStuffChain(llm);
+ var actual = chain.run(docs);
+
+ var expected =
+ "The article discusses the concept of building autonomous agents powered by large language models " +
+ "(LLMs). It explores the components of such agents, including planning, memory, and tool " +
+ "use. The article provides case studies and proof-of-concept examples of LLM-powered agents, " +
+ "as well as challenges and limitations associated with their development.";
+ assertEquals(expected, actual);
+ }
+}
\ No newline at end of file
diff --git a/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java b/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java
new file mode 100644
index 000000000..b2468b75f
--- /dev/null
+++ b/langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hw.langchain.examples.chains;
+
+import com.hw.langchain.chains.summarize.SummarizeUtils;
+import com.hw.langchain.document.loaders.WebBaseLoader;
+import com.hw.langchain.llms.openai.OpenAIChat;
+
+import java.util.List;
+
+import static com.hw.langchain.examples.utils.PrintUtils.println;
+
+/**
+ * Summarization use cases
+ *
+ * @author HamaWhite
+ */
+public class SummarizationExample {
+
+ public static void main(String[] args) {
+ var llm = OpenAIChat.builder()
+ .temperature(0)
+ .model("gpt-3.5-turbo-16k")
+ .build()
+ .init();
+
+ var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/"));
+ var docs = loader.load();
+
+ var chain = SummarizeUtils.loadStuffChain(llm);
+ var result = chain.run(docs);
+
+ println(result);
+ }
+}
diff --git a/langchain-server/pom.xml b/langchain-server/pom.xml
deleted file mode 100644
index 1571a0814..000000000
--- a/langchain-server/pom.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
- 4.0.0
-
- io.github.hamawhitegg
- langchain-java
- 0.1.11
-
-
- langchain-server
-
-
- true
-
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
- org.apache.maven.plugins
- maven-deploy-plugin
-
- true
-
-
-
-
-
diff --git a/langchain-web/pom.xml b/langchain-web/pom.xml
deleted file mode 100644
index 113f55563..000000000
--- a/langchain-web/pom.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-
-
- 4.0.0
-
- io.github.hamawhitegg
- langchain-java
- 0.1.11
-
-
- langchain-web
-
-
- true
-
-
-
-
-
- org.apache.maven.plugins
- maven-deploy-plugin
-
- true
-
-
-
-
-
diff --git a/pom.xml b/pom.xml
index 535999113..8aea52979 100644
--- a/pom.xml
+++ b/pom.xml
@@ -13,8 +13,6 @@
openai-client
langchain-core
- langchain-server
- langchain-web
serpapi-client
pinecone-client
langchain-examples
@@ -28,6 +26,7 @@
8.0.32
2.2.9
2.7.3
+ 1.16.1
1.0.0-M2.1
1.18.28
0.5.0
@@ -157,6 +156,12 @@
${reflections.version}
+
+ org.jsoup
+ jsoup
+ ${jsoup.version}
+
+
org.slf4j
slf4j-api