Skip to content

Commit

Permalink
Merge pull request HamaWhiteGG#23 from HamaWhiteGG/dev
Browse files Browse the repository at this point in the history
Pinecone+OpenAIEmbeddings+pinecone-client
  • Loading branch information
HamaWhiteGG authored Jun 29, 2023
2 parents 6daf274 + 26bfbcf commit 1e5b48d
Show file tree
Hide file tree
Showing 47 changed files with 3,846 additions and 6 deletions.
723 changes: 723 additions & 0 deletions docs/extras/modules/state_of_the_union.txt

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions langchain-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>io.github.hamawhitegg</groupId>
<artifactId>pinecone-client</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand All @@ -43,11 +49,21 @@
<artifactId>commons-collections4</artifactId>
</dependency>

<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
</dependency>

<dependency>
<groupId>org.python</groupId>
<artifactId>jython-standalone</artifactId>
</dependency>

<dependency>
<groupId>com.knuddels</groupId>
<artifactId>jtokkit</artifactId>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -87,6 +103,11 @@
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
</dependency>

<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ public Map<String, String> _call(Map<String, Object> inputs) {
String result = database.run(sqlCmd, false);
LOG.info("SQLResult: \n{}", result);

/**
* If return direct, we just set the final result equal to the result of the sql query result,
* otherwise try to get a human readable final answer
/*
* If return direct, we just set the final result equal to the result of the sql query result, otherwise try to
* get a human readable final answer
*/
String finalResult;
if (returnDirect) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public class ChatOpenAI extends BaseChatModel {
protected Integer maxTokens;

/**
* Validate that api key exists in environment.
* Validate parameters and init client
*/
public ChatOpenAI init() {
openaiApiKey = getOrEnvOrDefault(openaiApiKey, "OPENAI_API_KEY");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.document.loaders.base;

import com.hw.langchain.schema.Document;

import java.util.List;

/**
* Interface for loading documents.
* <p>
* Implementations should implement the lazy-loading method using generators
* to avoid loading all documents into memory at once.
*
* @author HamaWhite
*/
public interface BaseLoader {

/**
* Load data into document objects.
*
* @return a List which is materialized in memory.
*/
List<Document> load();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.document.loaders.helpers;

import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;

import java.nio.charset.Charset;

/**
* @author HamaWhite
*/
@Getter
@AllArgsConstructor
@EqualsAndHashCode
public class FileEncoding {

private Charset encoding;

private int confidence;

private String language;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.document.loaders.helpers;

import org.python.icu.text.CharsetDetector;
import org.python.icu.text.CharsetMatch;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* @author HamaWhite
*/
public class Helpers {

private Helpers() {
}

/**
* Try to detect the file encoding.
*/
public static FileEncoding detectFileEncodings(String filePath) throws IOException {
Path path = Paths.get(filePath);
byte[] data = Files.readAllBytes(path);

CharsetDetector detector = new CharsetDetector();
detector.setText(data);
CharsetMatch match = detector.detect();

Charset charset = Charset.forName(match.getName());
return new FileEncoding(charset, match.getConfidence(), match.getLanguage());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.document.loaders.text;

import com.hw.langchain.document.loaders.base.BaseLoader;
import com.hw.langchain.document.loaders.helpers.FileEncoding;
import com.hw.langchain.exception.LangChainException;
import com.hw.langchain.schema.Document;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;

import static com.hw.langchain.document.loaders.helpers.Helpers.detectFileEncodings;

/**
* Load text files.
*
* @author HamaWhite
*/
public class TextLoader implements BaseLoader {

private static final Logger LOG = LoggerFactory.getLogger(TextLoader.class);

private final String filePath;

private final Charset encoding;

private final boolean autodetectEncoding;

public TextLoader(String filePath) {
this(filePath, Charset.defaultCharset(), false);
}

/**
* Load text files.
*
* @param filePath Path to the file to load.
* @param encoding File encoding to use. If `null`, the file will be loaded with the default system encoding.
* @param autodetectEncoding Whether to try to autodetect the file encoding if the specified encoding fails.
*/
public TextLoader(String filePath, Charset encoding, boolean autodetectEncoding) {
this.filePath = filePath;
this.encoding = encoding;
this.autodetectEncoding = autodetectEncoding;
}

/**
* Load from file path.
*/
@Override
public List<Document> load() {
String text;
try {
text = Files.readString(Path.of(filePath), encoding);
} catch (IOException e) {
if (autodetectEncoding) {
text = loadWithDetectedEncoding(filePath);
} else {
throw new LangChainException(errorMessage(filePath), e);
}
} catch (Exception e) {
throw new LangChainException(errorMessage(filePath), e);
}
Map<String, Object> metadata = Map.of("source", filePath);
return List.of(new Document(text, metadata));
}

private String loadWithDetectedEncoding(String filePath) {
try {
FileEncoding detected = detectFileEncodings(filePath);
LOG.debug("Trying encoding: {}", detected.getEncoding());
return Files.readString(Path.of(filePath), detected.getEncoding());
} catch (IOException e) {
throw new LangChainException(errorMessage(filePath), e);
}
}

private String errorMessage(String filePath) {
return "Error loading " + filePath;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hw.langchain.embeddings.base;

import java.util.List;

/**
* Interface for embedding models.
*
* @author HamaWhite
*/
public interface Embeddings {

/**
* Embed search docs.
*/
List<List<Float>> embedDocuments(List<String> texts);

/**
* Embed query text.
*/
List<Float> embedQuery(String text);
}
Loading

0 comments on commit 1e5b48d

Please sign in to comment.