feat: limit request rate

dwarvesf · Jan 19, 2025 · 7ed6b6d · 7ed6b6d
1 parent 405ae34
commit 7ed6b6d
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@ __pycache__
 venv
 .env
 newsletter_bot.log
+.DS_Store
diff --git a/config.yaml b/config.yaml
@@ -32,3 +32,6 @@ output_settings:
 
 cron_settings:
   frequency: 1440 # Frequency in minutes
+# config.yaml
+api_settings:
+  gemini_rate_limit: 10 # Maximum number of calls per minute
diff --git a/config_manager.py b/config_manager.py
@@ -61,3 +61,8 @@ def get_cron_frequency():
     # Retrieve the frequency for the cron job in minutes
     config = load_config()
     return config['cron_settings']['frequency']
+
+def get_gemini_rate_limit():
+    # Retrieve the Gemini API rate limit from the config
+    config = load_config()
+    return config['api_settings']['gemini_rate_limit']
diff --git a/email_parser.py b/email_parser.py
@@ -3,32 +3,44 @@
 import os
 import json
 import re
-from config_manager import get_search_criteria, get_min_relevancy_score
+import time
+from config_manager import get_search_criteria, get_min_relevancy_score, get_gemini_rate_limit
 from datetime import datetime
 from email.utils import parsedate_to_datetime
 from article_summarize import crawl_and_summarize
 from promts import get_extract_articles_prompt
+
 logger = logging.getLogger(__name__)
 
 # Configure the Gemini API
 genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
 model = genai.GenerativeModel('gemini-2.0-flash-exp')
 
+# Rate limiter variables
+last_api_call_time = 0
+rate_limit_interval = 60 / get_gemini_rate_limit()
+
 def extract_articles(email):
-    """
-    Extracts individual articles from an email using Gemini 1.5 Pro.
-    Returns a list of dictionaries, each containing article title, description, and URL.
-    """
+    global last_api_call_time
+    articles = []
+
     logger.info(f"Extracting articles from email: {email.subject}")
 
     # Get grouped criteria
     grouped_criteria = get_search_criteria()
 
-
+    # Ensure rate limit is respected
+    current_time = time.time()
+    time_since_last_call = current_time - last_api_call_time
+    if time_since_last_call < rate_limit_interval:
+        sleep_time = rate_limit_interval - time_since_last_call
+        logger.info(f"Rate limit exceeded, sleeping for {sleep_time:.2f} seconds")
+        time.sleep(sleep_time)
 
     logger.info("Requesting Gemini to extract articles from email")
     prompt = get_extract_articles_prompt(email.text or email.html, grouped_criteria, get_min_relevancy_score(),True)
     response = model.generate_content(prompt)
+    last_api_call_time = time.time()  # Update the last API call time
 
     try:
         json_match = re.search(r'\[.*\]', response.text, re.DOTALL)
@@ -50,6 +62,7 @@ def extract_articles(email):
         logger.info("Requesting Gemini to extract other articles from email")
 
         enrichment_response = model.generate_content(enrichment_prompt)
+        last_api_call_time = time.time()  # Update the last API call time
 
         try:
             enrichment_json_match = re.search(r'\[.*\]', enrichment_response.text, re.DOTALL)
@@ -63,7 +76,10 @@ def extract_articles(email):
         except Exception as e:
             logger.error(f"Error parsing Gemini response: {str(e)}")
 
-        articles = [article for article in articles if not article.get('need_enrichment', True)]
+        articles = [
+            article for article in articles
+            if not article.get('need_enrichment', False) and isinstance(article.get('criteria'), list)
+        ]
     except json.JSONDecodeError as e:
         logger.error(f"Error decoding JSON from Gemini response: {str(e)}")
         articles = []

diff --git a/promts.py b/promts.py
@@ -2,7 +2,7 @@ def get_extract_articles_prompt(content=None, grouped_criteria=None, min_relevan
     return f"""
     Analyze the following email content and extract information about articles mentioned.
     For each article:
-    1. Extract the original title, description (if available), and URL (look for [LINK: url] in the text)
+    1. Extract the original title, description (if available), and URL (look for [LINK: url] in the text or <title>,<link>,<description> in content)
     2. Rewrite the title and description in a friendlier, lighter tone with a touch of personal feel
     3. Keep the rewritten content concise and engaging
     4. Restrict the description to be less than 160 characters, and be more to the point
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,4 @@ __pycache__ @@
     venv
     .env
     newsletter_bot.log
+    .DS_Store