From fa34faa8f3e80813eb9c6d4833196abc5dc27686 Mon Sep 17 00:00:00 2001 From: Thanarat Ruangrat Date: Tue, 12 Nov 2024 00:37:58 +0700 Subject: [PATCH] feat: add google vertexai embedding options --- README.md | 5 +++-- config.py | 7 +++++++ requirements.lite.txt | 1 + requirements.txt | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 37bdcdf..51d4f4d 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ The following environment variables are required to run the application: - `PDF_EXTRACT_IMAGES`: (Optional) A boolean value indicating whether to extract images from PDF files. Default value is "False". - `DEBUG_RAG_API`: (Optional) Set to "True" to show more verbose logging output in the server console, and to enable postgresql database routes - `CONSOLE_JSON`: (Optional) Set to "True" to log as json for Cloud Logging aggregations -- `EMBEDDINGS_PROVIDER`: (Optional) either "openai", "bedrock", "azure", "huggingface", "huggingfacetei" or "ollama", where "huggingface" uses sentence_transformers; defaults to "openai" +- `EMBEDDINGS_PROVIDER`: (Optional) either "openai", "bedrock", "azure", "huggingface", "huggingfacetei", "vertexai" or "ollama", where "huggingface" uses sentence_transformers; defaults to "openai" - `EMBEDDINGS_MODEL`: (Optional) Set a valid embeddings model to use from the configured provider. - **Defaults** - openai: "text-embedding-3-small" @@ -70,6 +70,7 @@ The following environment variables are required to run the application: - huggingfacetei: "http://huggingfacetei:3000". Hugging Face TEI uses model defined on TEI service launch. - ollama: "nomic-embed-text" - bedrock: "amazon.titan-embed-text-v1" + - vertexai: "text-embedding-004" - `RAG_AZURE_OPENAI_API_VERSION`: (Optional) Default is `2023-05-15`. The version of the Azure OpenAI API. - `RAG_AZURE_OPENAI_API_KEY`: (Optional) The API key for Azure OpenAI service. - Note: `AZURE_OPENAI_API_KEY` will work but `RAG_AZURE_OPENAI_API_KEY` will override it in order to not conflict with LibreChat setting. @@ -83,7 +84,7 @@ The following environment variables are required to run the application: - `AWS_DEFAULT_REGION`: (Optional) defaults to `us-east-1` - `AWS_ACCESS_KEY_ID`: (Optional) needed for bedrock embeddings - `AWS_SECRET_ACCESS_KEY`: (Optional) needed for bedrock embeddings - +- `GOOGLE_APPLICATION_CREDENTIALS` : (Optional) need for google vertexai embeddings (GOOGLE_APPLICATION_CREDENTIALS=serviceAccount.json) Make sure to set these environment variables before running the application. You can set them in a `.env` file or as system environment variables. ### Use Atlas MongoDB as Vector Database diff --git a/config.py b/config.py index 7b94a97..75c9184 100644 --- a/config.py +++ b/config.py @@ -24,6 +24,7 @@ class EmbeddingsProvider(Enum): HUGGINGFACETEI = "huggingfacetei" OLLAMA = "ollama" BEDROCK = "bedrock" + VERTEXAI = "vertexai" def get_env_variable( @@ -206,6 +207,10 @@ def init_embeddings(provider, model): from langchain_ollama import OllamaEmbeddings return OllamaEmbeddings(model=model, base_url=OLLAMA_BASE_URL) + elif provider == EmbeddingsProvider.VERTEXAI: + from langchain_google_vertexai import VertexAIEmbeddings + + return VertexAIEmbeddings(model_name=model) elif provider == EmbeddingsProvider.BEDROCK: from langchain_aws import BedrockEmbeddings @@ -241,6 +246,8 @@ def init_embeddings(provider, model): ) elif EMBEDDINGS_PROVIDER == EmbeddingsProvider.OLLAMA: EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "nomic-embed-text") +elif EMBEDDINGS_PROVIDER == EmbeddingsProvider.VERTEXAI: + EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "text-embedding-004") elif EMBEDDINGS_PROVIDER == EmbeddingsProvider.BEDROCK: EMBEDDINGS_MODEL = get_env_variable( "EMBEDDINGS_MODEL", "amazon.titan-embed-text-v1" diff --git a/requirements.lite.txt b/requirements.lite.txt index e149a04..7b026f8 100644 --- a/requirements.lite.txt +++ b/requirements.lite.txt @@ -29,4 +29,5 @@ python-magic==0.4.27 python-pptx==0.6.23 xlrd==2.0.1 langchain-aws==0.2.1 +langchain-google-vertexai==2.0.5 boto3==1.34.144 diff --git a/requirements.txt b/requirements.txt index a1fd5fe..73b0f29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,7 @@ langchain-mongodb==0.2.0 langchain-ollama==0.2.0 langchain-openai==0.2.0 langchain-huggingface==0.1.0 +langchain-google-vertexai==2.0.5 cryptography==43.0.1 python-magic==0.4.27 python-pptx==0.6.23