Skip to content

Commit

Permalink
Merge pull request HamaWhiteGG#97 from HamaWhiteGG/dev
Browse files Browse the repository at this point in the history
Support Summarization(Stuff)
  • Loading branch information
HamaWhiteGG authored Sep 9, 2023
2 parents 390140c + ad62e7a commit 1963421
Show file tree
Hide file tree
Showing 10 changed files with 232 additions and 71 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ The following example can view in the [langchain-example](langchain-examples/src

- [SQL Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SqlChainExample.java)
- [API Chains](langchain-examples/src/main/java/com/hw/langchain/examples/chains/ApiChainExample.java)
- [QA-Milvus](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java)
- [QA-Pinecone](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java)
- [QA-Milvus-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/MilvusExample.java)
- [QA-Pinecone-Text](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalQaExample.java)
- [QA-Pinecone-Markdown](langchain-examples/src/main/java/com/hw/langchain/examples/chains/RetrievalMarkdownExample.java)
- [Summarization](langchain-examples/src/main/java/com/hw/langchain/examples/chains/SummarizationExample.java)
- [Agent with Google Search](langchain-examples/src/main/java/com/hw/langchain/examples/agents/LlmAgentExample.java)
- [Spark SQL AI](langchain-bigdata/langchain-spark/src/test/java/com/hw/langchain/agents/toolkits/spark/sql/toolkit/SparkSqlToolkitTest.java)
- [Flink SQL AI](langchain-bigdata/langchain-flink/src/test/java/com/hw/langchain/agents/toolkits/flink/sql/toolkit/FlinkSqlToolkitTest.java)
Expand Down
5 changes: 5 additions & 0 deletions langchain-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
<artifactId>jtokkit</artifactId>
</dependency>

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ private void validateOutputs(Map<String, String> outputs) {
* If False, both input keys and new keys generated by this chain will be returned.
* Defaults to False.
*/
public Map<String, String> call(String input, boolean returnOnlyOutputs) {
public Map<String, String> call(Object input, boolean returnOnlyOutputs) {
Map<String, Object> inputs = prepInputs(input);
return call(inputs, returnOnlyOutputs);
}
Expand Down Expand Up @@ -126,7 +126,7 @@ private Map<String, String> prepOutputs(Map<String, Object> inputs, Map<String,
/**
* Validate and prep inputs.
*/
private Map<String, Object> prepInputs(String input) {
private Map<String, Object> prepInputs(Object input) {
Set<String> inputKeys = new HashSet<>(inputKeys());
if (memory != null) {
// If there are multiple input keys, but some get set by memory so that only one is not set,
Expand Down Expand Up @@ -162,7 +162,7 @@ public Map<String, Object> prepInputs(Map<String, Object> inputs) {
/**
* Run the chain as text in, text out
*/
public String run(String args) {
public String run(Object args) {
if (outputKeys().size() != 1) {
throw new IllegalArgumentException(
"The `run` method is not supported when there is not exactly one output key. Got " + outputKeys()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@

package com.hw.langchain.chains.summarize;

import com.hw.langchain.base.language.BaseLanguageModel;
import com.hw.langchain.chains.combine.documents.stuff.StuffDocumentsChain;
import com.hw.langchain.chains.combine.documents.stuff.StuffUtils;
import com.hw.langchain.chains.llm.LLMChain;
import com.hw.langchain.prompts.base.BasePromptTemplate;

/**
* @author HamaWhite
*/
Expand All @@ -26,4 +32,15 @@ public class SummarizeUtils {
private SummarizeUtils() {
throw new IllegalStateException("Utility class");
}

public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm) {
return loadStuffChain(llm, StuffPrompt.PROMPT, "text", "\n\n");
}

public static StuffDocumentsChain loadStuffChain(BaseLanguageModel llm, BasePromptTemplate prompt,
String documentVariableName, String documentSeparator) {
LLMChain llmChain = new LLMChain(llm, prompt);
return new StuffDocumentsChain(llmChain, StuffUtils.getDefaultDocumentPrompt(), documentVariableName,
documentSeparator);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.document.loaders;

import com.google.common.collect.Maps;
import com.hw.langchain.document.loaders.base.BaseLoader;
import com.hw.langchain.exception.LangChainException;
import com.hw.langchain.schema.Document;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
* @author HamaWhite
*/
public class WebBaseLoader extends BaseLoader {

private final List<String> webUrls;

public WebBaseLoader(List<String> webUrls) {
this.webUrls = webUrls;
}

@Override
public List<Document> load() {
List<Document> documents = new ArrayList<>(webUrls.size());
for (String url : webUrls) {
try {
org.jsoup.nodes.Document doc = Jsoup.connect(url).get();
Map<String, Object> metadata = buildMetadata(doc, url);

documents.add(new Document(doc.wholeText(), metadata));
} catch (IOException e) {
throw new LangChainException(errorMessage(url), e);
}
}
return documents;
}

private Map<String, Object> buildMetadata(org.jsoup.nodes.Document doc, String url) {
Map<String, Object> metadata = Maps.newHashMap();
metadata.put("source", url);

Element title = doc.select("title").first();
if (title != null) {
metadata.put("title", title.text());
}
Element description = doc.select("meta[name=description]").first();
metadata.put("description", description != null ? description.attr("content") : "No description found.");

Element html = doc.select("html").first();
metadata.put("language", html != null ? html.attr("lang") : "No language found.");
return metadata;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.chains.summarize;

import com.hw.langchain.base.language.BaseLanguageModel;
import com.hw.langchain.document.loaders.WebBaseLoader;
import com.hw.langchain.llms.openai.OpenAIChat;
import com.hw.langchain.schema.Document;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* <a href="https://python.langchain.com/docs/use_cases/summarization">Summarization use cases</a>
*
* @author HamaWhite
*/
@Disabled("Test requires costly OpenAI calls, can be run manually.")
class SummarizeUtilsTest {

private static BaseLanguageModel llm;

private static List<Document> docs;

@BeforeAll
static void setUp() {
llm = OpenAIChat.builder()
.temperature(0)
.model("gpt-3.5-turbo-16k")
.build()
.init();

var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/"));
docs = loader.load();
}

@Test
void testLoadStuffChain() {
var chain = SummarizeUtils.loadStuffChain(llm);
var actual = chain.run(docs);

var expected =
"The article discusses the concept of building autonomous agents powered by large language models " +
"(LLMs). It explores the components of such agents, including planning, memory, and tool " +
"use. The article provides case studies and proof-of-concept examples of LLM-powered agents, " +
"as well as challenges and limitations associated with their development.";
assertEquals(expected, actual);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.examples.chains;

import com.hw.langchain.chains.summarize.SummarizeUtils;
import com.hw.langchain.document.loaders.WebBaseLoader;
import com.hw.langchain.llms.openai.OpenAIChat;

import java.util.List;

import static com.hw.langchain.examples.utils.PrintUtils.println;

/**
* <a href="https://python.langchain.com/docs/use_cases/summarization">Summarization use cases</a>
*
* @author HamaWhite
*/
public class SummarizationExample {

public static void main(String[] args) {
var llm = OpenAIChat.builder()
.temperature(0)
.model("gpt-3.5-turbo-16k")
.build()
.init();

var loader = new WebBaseLoader(List.of("https://lilianweng.github.io/posts/2023-06-23-agent/"));
var docs = loader.load();

var chain = SummarizeUtils.loadStuffChain(llm);
var result = chain.run(docs);

println(result);
}
}
36 changes: 0 additions & 36 deletions langchain-server/pom.xml

This file was deleted.

28 changes: 0 additions & 28 deletions langchain-web/pom.xml

This file was deleted.

9 changes: 7 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
<modules>
<module>openai-client</module>
<module>langchain-core</module>
<module>langchain-server</module>
<module>langchain-web</module>
<module>serpapi-client</module>
<module>pinecone-client</module>
<module>langchain-examples</module>
Expand All @@ -28,6 +26,7 @@
<mysql.version>8.0.32</mysql.version>
<milvus.version>2.2.9</milvus.version>
<jython.version>2.7.3</jython.version>
<jsoup.version>1.16.1</jsoup.version>
<nd4j.version>1.0.0-M2.1</nd4j.version>
<lombok.version>1.18.28</lombok.version>
<jtokkit.version>0.5.0</jtokkit.version>
Expand Down Expand Up @@ -157,6 +156,12 @@
<version>${reflections.version}</version>
</dependency>

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down

0 comments on commit 1963421

Please sign in to comment.