Merge branch 'master' into ddg-search

Significant-Gravitas · Apr 4, 2023 · e1cf66b · e1cf66b
2 parents 6306a03 + 8747ff6
commit e1cf66b
Show file tree

Hide file tree

Showing 11 changed files with 210 additions and 121 deletions.
diff --git a/.env.template b/.env.template
@@ -1,4 +1,6 @@
 OPENAI_API_KEY=your-openai-api-key
 ELEVENLABS_API_KEY=your-elevenlabs-api-key
 SMART_LLM_MODEL="gpt-4"
-FAST_LLM_MODEL="gpt-3.5-turbo"
+FAST_LLM_MODEL="gpt-3.5-turbo"
+GOOGLE_API_KEY=
+CUSTOM_SEARCH_ENGINE_ID=
diff --git a/README.md b/README.md
@@ -29,13 +29,22 @@ Your support is greatly appreciated
 
 ## Table of Contents
 
-- [Features](#-features)
-- [Requirements](#-requirements)
-- [Installation](#-installation)
-- [Usage](#-usage)
-- [Limitations](#-limitations)
-- [Disclaimer](#-disclaimer)
-- [Connect with Us on Twitter ](#-connect-with-us-on-twitter)
+- [Auto-GPT: An Autonomous GPT-4 Experiment](#auto-gpt-an-autonomous-gpt-4-experiment)
+    - [Demo (30/03/2023):](#demo-30032023)
+  - [💖 Help Fund Auto-GPT's Development](#-help-fund-auto-gpts-development)
+  - [Table of Contents](#table-of-contents)
+  - [🚀 Features](#-features)
+  - [📋 Requirements](#-requirements)
+  - [💾 Installation](#-installation)
+  - [🔧 Usage](#-usage)
+  - [🗣️ Speech Mode](#️-speech-mode)
+  - [🔍 Google API Keys Configuration](#-google-api-keys-configuration)
+    - [Setting up environment variables](#setting-up-environment-variables)
+  - [💀 Continuous Mode ⚠️](#-continuous-mode-️)
+  - [GPT3.5 ONLY Mode](#gpt35-only-mode)
+  - [⚠️ Limitations](#️-limitations)
+  - [🛡 Disclaimer](#-disclaimer)
+  - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter)
 
 
 ## 🚀 Features
@@ -100,6 +109,35 @@ python scripts/main.py --speak
 
 ```
 
+## 🔍 Google API Keys Configuration
+
+This section is optional, use the official google api if you are having issues with error 429 when running google search.
+To use the `google_official_search` command, you need to set up your Google API keys in your environment variables.
+
+1. Go to the [Google Cloud Console](https://console.cloud.google.com/).
+2. If you don't already have an account, create one and log in.
+3. Create a new project by clicking on the "Select a Project" dropdown at the top of the page and clicking "New Project". Give it a name and click "Create".
+4. Go to the [APIs & Services Dashboard](https://console.cloud.google.com/apis/dashboard) and click "Enable APIs and Services". Search for "Custom Search API" and click on it, then click "Enable".
+5. Go to the [Credentials](https://console.cloud.google.com/apis/credentials) page and click "Create Credentials". Choose "API Key".
+6. Copy the API key and set it as an environment variable named `GOOGLE_API_KEY` on your machine. See setting up environment variables below.
+7. Go to the [Custom Search Engine](https://cse.google.com/cse/all) page and click "Add".
+8. Set up your search engine by following the prompts. You can choose to search the entire web or specific sites.
+9.  Once you've created your search engine, click on "Control Panel" and then "Basics". Copy the "Search engine ID" and set it as an environment variable named `CUSTOM_SEARCH_ENGINE_ID` on your machine. See setting up environment variables below.
+
+### Setting up environment variables
+   For Windows Users:
+```
+setx GOOGLE_API_KEY "YOUR_GOOGLE_API_KEY"
+setx CUSTOM_SEARCH_ENGINE_ID "YOUR_CUSTOM_SEARCH_ENGINE_ID"
+
+```
+For macOS and Linux users:
+```
+export GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY"
+export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID"
+
+```
+
 ## 💀 Continuous Mode ⚠️
 Run the AI **without** user authorisation, 100% automated.
 Continuous mode is not recommended. 

diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,7 @@ pyyaml==6.0
 readability-lxml==0.8.1
 requests
 tiktoken==0.3.3
+gTTS==2.3.1
 docker
 duckduckgo-search
-# Googlesearch python seems to be a bit cursed, anyone good at fixing thigns like this?
+google-api-python-client #(https://developers.google.com/custom-search/v1/overview) 
diff --git a/scripts/browse.py b/scripts/browse.py
@@ -74,61 +74,42 @@ def split_text(text, max_length=8192):
         yield "\n".join(current_chunk)
 
 
-def summarize_text(text, is_website=True):
-    if text == "":
+def create_message(chunk, question):
+    return {
+        "role": "user",
+        "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
+    }
+
+def summarize_text(text, question):
+    if not text:
         return "Error: No text to summarize"
 
-    print("Text length: " + str(len(text)) + " characters")
+    text_length = len(text)
+    print(f"Text length: {text_length} characters")
+
     summaries = []
     chunks = list(split_text(text))
 
     for i, chunk in enumerate(chunks):
-        print("Summarizing chunk " + str(i + 1) + " / " + str(len(chunks)))
-        if is_website:
-            messages = [
-                {
-                    "role": "user",
-                    "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " +
-                    chunk},
-            ]
-        else:
-            messages = [
-                {
-                    "role": "user",
-                    "content": "Please summarize the following text, focusing on extracting concise and specific information: " +
-                    chunk},
-            ]
+        print(f"Summarizing chunk {i + 1} / {len(chunks)}")
+        messages = [create_message(chunk, question)]
 
         summary = create_chat_completion(
             model=cfg.fast_llm_model,
             messages=messages,
             max_tokens=300,
         )
         summaries.append(summary)
-    print("Summarized " + str(len(chunks)) + " chunks.")
 
-    combined_summary = "\n".join(summaries)
+    print(f"Summarized {len(chunks)} chunks.")
 
-    # Summarize the combined summary
-    if is_website:
-        messages = [
-            {
-                "role": "user",
-                "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " +
-                combined_summary},
-        ]
-    else:
-        messages = [
-            {
-                "role": "user",
-                "content": "Please summarize the following text, focusing on extracting concise and specific infomation: " +
-                combined_summary},
-        ]
+    combined_summary = "\n".join(summaries)
+    messages = [create_message(combined_summary, question)]
 
     final_summary = create_chat_completion(
         model=cfg.fast_llm_model,
         messages=messages,
         max_tokens=300,
     )
 
-    return final_summary
+    return final_summary
diff --git a/scripts/commands.py b/scripts/commands.py
@@ -10,6 +10,9 @@
 from execute_code import execute_python_file
 from json_parser import fix_and_parse_json
 from duckduckgo_search import ddg
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+
 cfg = Config()
 
 
@@ -44,7 +47,13 @@ def get_command(response):
 def execute_command(command_name, arguments):
     try:
         if command_name == "google":
-            return google_search(arguments["input"])
+
+            # Check if the Google API key is set and use the official search method
+            # If the API key is not set or has only whitespaces, use the unofficial search method
+            if cfg.google_api_key and (cfg.google_api_key.strip() if cfg.google_api_key else None):
+                return google_official_search(arguments["input"])
+            else:
+                return google_search(arguments["input"])
         elif command_name == "memory_add":
             return commit_memory(arguments["string"])
         elif command_name == "memory_del":
@@ -63,7 +72,7 @@ def execute_command(command_name, arguments):
         elif command_name == "delete_agent":
             return delete_agent(arguments["key"])
         elif command_name == "get_text_summary":
-            return get_text_summary(arguments["url"])
+            return get_text_summary(arguments["url"], arguments["question"])
         elif command_name == "get_hyperlinks":
             return get_hyperlinks(arguments["url"])
         elif command_name == "read_file":
@@ -75,7 +84,7 @@ def execute_command(command_name, arguments):
         elif command_name == "delete_file":
             return delete_file(arguments["file"])
         elif command_name == "browse_website":
-            return browse_website(arguments["url"])
+            return browse_website(arguments["url"], arguments["question"])
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again"
@@ -108,9 +117,43 @@ def google_search(query, num_results=8):
 
     return json.dumps(search_results, ensure_ascii=False, indent=4)
 
+def google_official_search(query, num_results=8):
+    from googleapiclient.discovery import build
+    from googleapiclient.errors import HttpError
+    import json
+
+    try:
+        # Get the Google API key and Custom Search Engine ID from the config file
+        api_key = cfg.google_api_key
+        custom_search_engine_id = cfg.custom_search_engine_id
+
+        # Initialize the Custom Search API service
+        service = build("customsearch", "v1", developerKey=api_key)
+
+        # Send the search query and retrieve the results
+        result = service.cse().list(q=query, cx=custom_search_engine_id, num=num_results).execute()
+
+        # Extract the search result items from the response
+        search_results = result.get("items", [])
+
+        # Create a list of only the URLs from the search results
+        search_results_links = [item["link"] for item in search_results]
+
+    except HttpError as e:
+        # Handle errors in the API call
+        error_details = json.loads(e.content.decode())
+
+        # Check if the error is related to an invalid or missing API key
+        if error_details.get("error", {}).get("code") == 403 and "invalid API key" in error_details.get("error", {}).get("message", ""):
+            return "Error: The provided Google API key is invalid or missing."
+        else:
+            return f"Error: {e}"
+
+    # Return the list of search result URLs
+    return search_results_links
 
-def browse_website(url):
-    summary = get_text_summary(url)
+def browse_website(url, question):
+    summary = get_text_summary(url, question)
     links = get_hyperlinks(url)
 
     # Limit links to 5
@@ -122,9 +165,9 @@ def browse_website(url):
     return result
 
 
-def get_text_summary(url):
+def get_text_summary(url, question):
     text = browse.scrape_text(url)
-    summary = browse.summarize_text(text)
+    summary = browse.summarize_text(text, question)
     return """ "Result" : """ + summary
 
 

diff --git a/scripts/config.py b/scripts/config.py
@@ -35,11 +35,13 @@ def __init__(self):
 
         self.openai_api_key = os.getenv("OPENAI_API_KEY")
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
+
+        self.google_api_key = os.getenv("GOOGLE_API_KEY")
+        self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
 
         # Initialize the OpenAI API client
         openai.api_key = self.openai_api_key
 
-
     def set_continuous_mode(self, value: bool):
         self.continuous_mode = value
 
@@ -59,10 +61,13 @@ def set_smart_token_limit(self, value: int):
         self.smart_token_limit = value
 
     def set_openai_api_key(self, value: str):
-        self.apiopenai_api_key_key = value
+        self.openai_api_key = value
 
     def set_elevenlabs_api_key(self, value: str):
         self.elevenlabs_api_key = value
-
-
 
+    def set_google_api_key(self, value: str):
+        self.google_api_key = value
+
+    def set_custom_search_engine_id(self, value: str):
+        self.custom_search_engine_id = value
diff --git a/scripts/data.py b/scripts/data.py
@@ -1,6 +1,6 @@
 import os
 from pathlib import Path
-
+SRC_DIR = Path(__file__).parent
 
 def load_prompt():
     try:
@@ -9,7 +9,7 @@ def load_prompt():
         data_dir = file_dir / "data"
         prompt_file = data_dir / "prompt.txt"
         # Load the promt from data/prompt.txt
-        with open(prompt_file, "r") as prompt_file:
+        with open(SRC_DIR/ "data/prompt.txt", "r") as prompt_file:
             prompt = prompt_file.read()
 
         return prompt

diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt
@@ -1,15 +1,16 @@
 CONSTRAINTS:
 
-1. ~4000 word limit for memory. Your memory is short, so immidiately save important information to long term memory and code to files.
+1. ~4000 word limit for memory. Your memory is short, so immediately save important information to long term memory and code to files.
 2. No user assistance
+3. Exclusively use the commands listed in double quotes e.g. "command name"
 
 COMMANDS:
 
 1. Google Search: "google", args: "input": "<search>"
 2. Memory Add: "memory_add", args: "string": "<string>"
 3. Memory Delete: "memory_del", args: "key": "<key>"
 4. Memory Overwrite: "memory_ovr", args: "key": "<key>", "string": "<string>"
-5. Browse Website: "browse_website", args: "url": "<url>"
+5. Browse Website: "browse_website", args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
 6. Start GPT Agent: "start_agent",  args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
 7. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
 8. List GPT Agents: "list_agents", args: ""
@@ -34,9 +35,9 @@ RESOURCES:
 PERFORMANCE EVALUATION:
 
 1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities. 
-2. Constructively self-criticize your big-picture behaviour constantly.
+2. Constructively self-criticize your big-picture behavior constantly.
 3. Reflect on past decisions and strategies to refine your approach.
-4. Every command has a cost, so be smart and efficent. Aim to complete tasks in the least number of steps.
+4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
 
 You should only respond in JSON format as described below
 
@@ -58,4 +59,4 @@ RESPONSE FORMAT:
     }
 }
 
-Ensure the response can be parsed by Python json.loads
+Ensure the response can be parsed by Python json.loads
diff --git a/scripts/file_operations.py b/scripts/file_operations.py
@@ -31,6 +31,9 @@ def read_file(filename):
 def write_to_file(filename, text):
     try:
         filepath = safe_join(working_directory, filename)
+        directory = os.path.dirname(filepath)
+        if not os.path.exists(directory):
+            os.makedirs(directory)
         with open(filepath, "w") as f:
             f.write(text)
         return "File written to successfully."