cr

Kerakov · Feb 6, 2023 · 1ea7ce7 · 1ea7ce7
1 parent e0c0e80
commit 1ea7ce7
Show file tree

Hide file tree

Showing 5 changed files with 733 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -1,36 +1,16 @@
-# Chat-LangChain-Notion
+# Chat-Your-Data
 
-Create a ChatGPT like experience over your Notion database using [LangChain](https://github.com/hwchase17/langchain).
+A template repo to serve as an example of how to set up a ChatGPT-like experience over your own data.
 
-
-## 📊 Example Data
-This repo uses the [Blendle Employee Handbook](https://www.notion.so/Blendle-s-Employee-Handbook-7692ffe24f07450785f093b94bbe1a09) as an example.
-It was downloaded October 18th so may have changed slightly since then!
-
-## 🧑 Instructions for ingesting your own dataset
-
-Export your dataset from Notion. You can do this by clicking on the three dots in the upper right hand corner and then clicking `Export`.
-
-<img src="export_notion.png" alt="export" width="200"/>
-
-When exporting, make sure to select the `Markdown & CSV` format option.
-
-<img src="export_format.png" alt="export-format" width="200"/>
-
-This will produce a `.zip` file in your Downloads folder. Move the `.zip` file into this repository.
-
-Run the following command to unzip the zip file (replace the `Export...` with your own file name as needed).
-
-```shell
-unzip Export-d3adfe0f-3131-4bf3-8987-a52017fc1bae.zip -d Notion_DB
-```
+See [this blog post](TODO) for a more detailed explanation.
 
 ## Ingest data
 
+Ingestion of data is done over the `state_of_the_union.txt` file. 
 Therefor, the only thing that is needed is to be done to ingest data is run `python ingest_data.py`
 
 ## Query data
-Custom prompts are used to ground the answers in the Blendle Employee Handbook files.
+Custom prompts are used to ground the answers in the state of the union text file.
 
 ## Running the Application
 

diff --git a/ingest_data.py b/ingest_data.py
@@ -1,11 +1,11 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import NotionDirectoryLoader
+from langchain.document_loaders.unstructured import UnstructuredFileLoader
 from langchain.vectorstores.faiss import FAISS
 from langchain.embeddings import OpenAIEmbeddings
 import pickle
 
 # Load Data
-loader = NotionDirectoryLoader("Notion_DB")
+loader = UnstructuredFileLoader("state_of_the_union.txt")
 raw_documents = loader.load()
 
 # Split text

diff --git a/query_data.py b/query_data.py
@@ -3,19 +3,18 @@
 from langchain.chains import ChatVectorDBChain
 
 _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
-You can assume the question about the Blendle Employee Handbook.
+You can assume the question about the most recent state of the union address.
 
 Chat History:
 {chat_history}
 Follow Up Input: {question}
 Standalone question:"""
 CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
 
-template = """You are an AI assistant for answering questions about the Blendle Employee Handbook.
+template = """You are an AI assistant for answering questions about the most recent state of the union address.
 You are given the following extracted parts of a long document and a question. Provide a conversational answer.
 If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
-If the question is not about the Blendle Employee Handbook, politely inform them that you are tuned to only answer questions about the Blendle Employee Handbook.
-
+If the question is not about the most recent state of the union, politely inform them that you are tuned to only answer questions about the most recent state of the union.
 Question: {question}
 =========
 {context}