From 7f5efddb9975e14ea1d1ade41c441c9826068ac7 Mon Sep 17 00:00:00 2001 From: Ashvin Date: Mon, 24 Jun 2024 23:06:24 +0530 Subject: [PATCH] added new environment variable for taking the embedding model in engine. (#516) Co-authored-by: = <=> --- services/engine/.env.example | 3 +++ services/engine/dataherald/finetuning/openai_finetuning.py | 2 +- .../dataherald/sql_generator/dataherald_finetuning_agent.py | 2 +- .../engine/dataherald/sql_generator/dataherald_sqlagent.py | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/services/engine/.env.example b/services/engine/.env.example index 4d1d2027..e977f74d 100644 --- a/services/engine/.env.example +++ b/services/engine/.env.example @@ -48,3 +48,6 @@ MINIO_ROOT_USER= MINIO_ROOT_PASSWORD= CORE_PORT = 80 # This env var defines the port that will be exposed by the container. It serves as the configuration for both the internal and external container ports. + +# While using Azure, mention the embedding model here. If you are using OpenAI, use the "text-embedding-3-large" +EMBEDDING_MODEL = "text-embedding-3-large" \ No newline at end of file diff --git a/services/engine/dataherald/finetuning/openai_finetuning.py b/services/engine/dataherald/finetuning/openai_finetuning.py index 148c64bd..95738448 100644 --- a/services/engine/dataherald/finetuning/openai_finetuning.py +++ b/services/engine/dataherald/finetuning/openai_finetuning.py @@ -25,7 +25,7 @@ from dataherald.utils.models_context_window import OPENAI_FINETUNING_MODELS_WINDOW_SIZES FILE_PROCESSING_ATTEMPTS = 20 -EMBEDDING_MODEL = "text-embedding-3-large" +EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL","text-embedding-3-large") CATEGORICAL_COLUMNS_THRESHOLD = 60 logger = logging.getLogger(__name__) diff --git a/services/engine/dataherald/sql_generator/dataherald_finetuning_agent.py b/services/engine/dataherald/sql_generator/dataherald_finetuning_agent.py index 29d95794..40750086 100644 --- a/services/engine/dataherald/sql_generator/dataherald_finetuning_agent.py +++ b/services/engine/dataherald/sql_generator/dataherald_finetuning_agent.py @@ -58,7 +58,7 @@ TOP_K = SQLGenerator.get_upper_bound_limit() -EMBEDDING_MODEL = "text-embedding-3-large" +EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL","text-embedding-3-large") TOP_TABLES = 20 diff --git a/services/engine/dataherald/sql_generator/dataherald_sqlagent.py b/services/engine/dataherald/sql_generator/dataherald_sqlagent.py index d49e2589..c615c072 100644 --- a/services/engine/dataherald/sql_generator/dataherald_sqlagent.py +++ b/services/engine/dataherald/sql_generator/dataherald_sqlagent.py @@ -58,7 +58,7 @@ TOP_K = SQLGenerator.get_upper_bound_limit() -EMBEDDING_MODEL = "text-embedding-3-large" +EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL","text-embedding-3-large") TOP_TABLES = 20 @@ -147,7 +147,7 @@ class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): name = "SqlDbQuery" description = """ - Input: A well-formed multi-line SQL query between ```sql and ``` tags. + Input: -- A well-formed multi-line SQL query between ```sql and ``` tags. Output: Result from the database or an error message if the query is incorrect. If an error occurs, rewrite the query and retry. Use this tool to execute SQL queries.