Merge branch 'STAGING' of https://github.com/neo4j-labs/llm-graph-bui…

…lder
neo4j-labs · Jul 12, 2024 · ccf8f60 · ccf8f60
2 parents 083f487 + 616eb08
commit ccf8f60
Show file tree

Hide file tree

Showing 80 changed files with 5,534 additions and 3,556 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -167,4 +168,4 @@ google-cloud-cli-469.0.0-linux-x86_64.tar.gz
 /backend/src/chunks
 /backend/merged_files
 /backend/chunks
-google-cloud-cli-479.0.0-linux-x86_64.tar.gz
+google-cloud-cli-479.0.0-linux-x86_64.tar.gz
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ DIFFBOT_API_KEY="your-diffbot-key"
 
 if you only want OpenAI:
 ```env
-LLM_MODELS="gpt-3.5,gpt-4o"
+LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
 OPENAI_API_KEY="your-openai-key"
 ```
 
@@ -70,6 +70,18 @@ GOOGLE_CLIENT_ID="xxxx"
 
 You can of course combine all (local, youtube, wikipedia, s3 and gcs) or remove any you don't want/need.
 
+### Chat Modes
+
+By default,all of the chat modes will be available: vector, graph+vector and graph.
+If none of the mode is mentioned in the chat modes variable all modes will be available:
+```env
+CHAT_MODES=""
+```
+
+If however you want to specifiy the only vector mode or only graph mode you can do that by specifying the mode in the env:
+```env
+CHAT_MODES="vector,graph+vector"
+```
 
 #### Running Backend and Frontend separately (dev environment)
 Alternatively, you can run the backend and frontend separately:
@@ -134,12 +146,21 @@ Allow unauthenticated request : Yes
 | BACKEND_API_URL         | Optional           | http://localhost:8000 | URL for backend API                                                                       |
 | BLOOM_URL               | Optional           | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization |
 | REACT_APP_SOURCES       | Optional           | local,youtube,wiki,s3 | List of input sources that will be available                                               |
-| LLM_MODELS              | Optional           | diffbot,gpt-3.5,gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A Chatbot                          |
+| LLM_MODELS              | Optional           | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A
+| CHAT_MODES              | Optional           | vector,graph+vector,graph | Chat modes available for Q&A
 | ENV                     | Optional           | DEV           | Environment variable for the app                                                                 |
 | TIME_PER_CHUNK          | Optional           | 4             | Time per chunk for processing                                                                    |
 | CHUNK_SIZE              | Optional           | 5242880       | Size of each chunk of file for upload                                                                |
 | GOOGLE_CLIENT_ID        | Optional           |               | Client ID for Google authentication                                                              |
 | GCS_FILE_CACHE        | Optional           | False              | If set to True, will save the files to process into GCS. If set to False, will save the files locally   |
+| ENTITY_EMBEDDING        | Optional           | False              | If set to True, It will add embeddings for each entity in database |
+| LLM_MODEL_CONFIG_azure_ai_<azure_deployment_name>        | Optional           |              | Set azure config as - azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version|
+| LLM_MODEL_CONFIG_groq_<model_name>        | Optional           |               | Set groq config as - model_name,base_url,groq_api_key |
+| LLM_MODEL_CONFIG_anthropic_<model_name>        | Optional           |               | Set anthropic config as - model_name,anthropic_api_key |
+| LLM_MODEL_CONFIG_fireworks_<model_name>        | Optional           |               | Set fireworks config as - model_name,fireworks_api_key |
+| LLM_MODEL_CONFIG_bedrock_<model_name>        | Optional           |               | Set bedrock config as - model_name,aws_access_key_id,aws_secret__access_key,region_name |
+| LLM_MODEL_CONFIG_ollama_<model_name>        | Optional           |               | Set ollama config as - model_name,model_local_url |
+
 
 
 

diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -16,7 +16,8 @@ RUN apt-get update && \
 ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
 # Copy requirements file and install Python dependencies
 COPY requirements.txt /code/
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# --no-cache-dir --upgrade 
+RUN pip install -r requirements.txt 
 # Copy application code
 COPY . /code
 # Set command

diff --git a/backend/example.env b/backend/example.env
@@ -21,5 +21,16 @@ LANGCHAIN_PROJECT = ""
 LANGCHAIN_TRACING_V2 = ""
 LANGCHAIN_ENDPOINT = ""
 GCS_FILE_CACHE = "" #save the file into GCS or local, SHould be True or False
-NEO4J_USER_AGENT = ""
-ENABLE_USER_AGENT = ""
+NEO4J_USER_AGENT=""
+ENABLE_USER_AGENT = ""
+LLM_MODEL_CONFIG_model_version=""
+ENTITY_EMBEDDING="" True or False
+#examples
+LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version"
+LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version"
+LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key"
+LLM_MODEL_CONFIG_anthropic_claude_3_5_sonnet="model_name,anthropic_api_key"
+LLM_MODEL_CONFIG_fireworks_llama_v3_70b="model_name,fireworks_api_key"
+LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name"
+LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url"
+
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -8,8 +8,8 @@ asyncio==3.4.3
 attrs==23.2.0
 backoff==2.2.1
 beautifulsoup4==4.12.3
-boto3
-botocore
+boto3==1.34.140
+botocore==1.34.140
 cachetools==5.3.3
 certifi==2024.2.2
 cffi==1.16.0
@@ -28,8 +28,8 @@ docstring_parser==0.16
 effdet==0.4.1
 emoji==2.10.1
 exceptiongroup==1.2.0
-fastapi
-fastapi-health
+fastapi==0.111.0
+fastapi-health==0.4.0
 filelock==3.13.1
 filetype==1.2.0
 flatbuffers==23.5.26
@@ -38,24 +38,24 @@ frozenlist==1.4.1
 fsspec==2024.2.0
 google-api-core==2.18.0
 google-auth==2.29.0
-google_auth_oauthlib
-google-cloud-aiplatform
+google_auth_oauthlib==1.2.0
+google-cloud-aiplatform==1.58.0
 google-cloud-bigquery==3.19.0
 google-cloud-core==2.4.1
 google-cloud-resource-manager==1.12.3
-google-cloud-storage
+google-cloud-storage==2.17.0
 google-crc32c==1.5.0
 google-resumable-media==2.7.0
 googleapis-common-protos==1.63.0
 greenlet==3.0.3
 grpc-google-iam-v1==0.13.0
 grpcio==1.62.1
-google-ai-generativelanguage
+google-ai-generativelanguage==0.6.6
 grpcio-status==1.62.1
 h11==0.14.0
 httpcore==1.0.4
 httpx==0.27.0
-huggingface-hub==0.20.3
+huggingface-hub
 humanfriendly==10.0
 idna==3.6
 importlib-resources==6.1.1
@@ -67,21 +67,25 @@ joblib==1.3.2
 jsonpatch==1.33
 jsonpath-python==1.0.6
 jsonpointer==2.4
+json-repair==0.25.2
 kiwisolver==1.4.5
-langchain
-langchain-google-genai
-langchain-community
-langchain-core
-langchain-experimental
-langchain-google-vertexai
-langchain-groq
-langchain-openai
-langchain-text-splitters==0.0.1
+langchain==0.2.6
+langchain-aws==0.1.9
+langchain-anthropic==0.1.19
+langchain-fireworks==0.1.4
+langchain-google-genai==1.0.7
+langchain-community==0.2.6
+langchain-core==0.2.10
+langchain-experimental==0.0.62
+langchain-google-vertexai==1.0.6
+langchain-groq==0.1.6
+langchain-openai==0.1.14
+langchain-text-splitters==0.2.2
 langdetect==1.0.9
-langsmith==0.1.31
+langsmith==0.1.83
 layoutparser==0.3.4
-langserve
-langchain-cli
+langserve==0.2.2
+#langchain-cli==0.0.25
 lxml==5.1.0
 MarkupSafe==2.1.5
 marshmallow==3.20.2
@@ -94,9 +98,9 @@ networkx==3.2.1
 nltk==3.8.1
 numpy==1.26.4
 omegaconf==2.3.0
-onnx==1.15.0
-onnxruntime==1.15.1
-openai==1.14.2
+onnx==1.16.1
+onnxruntime==1.18.1
+openai==1.35.10
 opencv-python==4.8.0.76
 orjson==3.9.15
 packaging==23.2
@@ -110,15 +114,16 @@ pillow_heif==0.15.0
 portalocker==2.8.2
 proto-plus==1.23.0
 protobuf==4.23.4
+psutil==6.0.0
 pyasn1==0.6.0
 pyasn1_modules==0.4.0
 pycocotools==2.0.7
 pycparser==2.21
-pydantic==2.6.4
-pydantic_core==2.16.3
+pydantic==2.8.2
+pydantic_core==2.20.1
 pyparsing==3.0.9
 pypdf==4.0.1
-PyPDF2
+PyPDF2==3.0.1
 pypdfium2==4.27.0
 pytesseract==0.3.10
 python-dateutil==2.8.2
@@ -131,44 +136,45 @@ pytz==2024.1
 PyYAML==6.0.1
 rapidfuzz==3.6.1
 regex==2023.12.25
-requests
+requests==2.32.3
 rsa==4.9
 s3transfer==0.10.1
-safetensors==0.3.2
+safetensors==0.4.1
 scipy==1.10.1
 shapely==2.0.3
 six==1.16.0
 sniffio==1.3.1
 soupsieve==2.5
 SQLAlchemy==2.0.28
-starlette==0.36.3
-starlette-session
+starlette==0.37.2
+sse-starlette==2.1.2
+starlette-session==0.4.3
 sympy==1.12
 tabulate==0.9.0
 tenacity==8.2.3
-tiktoken==0.6.0
+tiktoken==0.7.0
 timm==0.9.12
-tokenizers==0.15.2
+tokenizers==0.19
 tqdm==4.66.2
-transformers==4.37.1
+transformers==4.42.3
 types-protobuf
 types-requests
 typing-inspect==0.9.0
 typing_extensions==4.9.0
 tzdata==2024.1
-unstructured
-unstructured-client
-unstructured-inference
-unstructured.pytesseract
-unstructured[all-docs]
-urllib3
-uvicorn
-gunicorn
+unstructured==0.14.9
+unstructured-client==0.23.8
+unstructured-inference==0.7.36
+unstructured.pytesseract==0.3.12
+unstructured[all-docs]==0.14.9
+urllib3==2.2.2
+uvicorn==0.30.1
+gunicorn==22.0.0
 wikipedia==1.4.0
 wrapt==1.16.0
 yarl==1.9.4
 youtube-transcript-api==0.6.2
 zipp==3.17.0
-sentence-transformers
+sentence-transformers==2.7.0
 google-cloud-logging==3.10.0
 PyMuPDF==1.24.5