Skip to content

Commit

Permalink
feat: limit request rate
Browse files Browse the repository at this point in the history
  • Loading branch information
hthai2201 committed Jan 19, 2025
1 parent 405ae34 commit 7ed6b6d
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ __pycache__
venv
.env
newsletter_bot.log
.DS_Store
3 changes: 3 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ output_settings:

cron_settings:
frequency: 1440 # Frequency in minutes
# config.yaml
api_settings:
gemini_rate_limit: 10 # Maximum number of calls per minute
5 changes: 5 additions & 0 deletions config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,8 @@ def get_cron_frequency():
# Retrieve the frequency for the cron job in minutes
config = load_config()
return config['cron_settings']['frequency']

def get_gemini_rate_limit():
# Retrieve the Gemini API rate limit from the config
config = load_config()
return config['api_settings']['gemini_rate_limit']
30 changes: 23 additions & 7 deletions email_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,44 @@
import os
import json
import re
from config_manager import get_search_criteria, get_min_relevancy_score
import time
from config_manager import get_search_criteria, get_min_relevancy_score, get_gemini_rate_limit
from datetime import datetime
from email.utils import parsedate_to_datetime
from article_summarize import crawl_and_summarize
from promts import get_extract_articles_prompt

logger = logging.getLogger(__name__)

# Configure the Gemini API
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
model = genai.GenerativeModel('gemini-2.0-flash-exp')

# Rate limiter variables
last_api_call_time = 0
rate_limit_interval = 60 / get_gemini_rate_limit()

def extract_articles(email):
"""
Extracts individual articles from an email using Gemini 1.5 Pro.
Returns a list of dictionaries, each containing article title, description, and URL.
"""
global last_api_call_time
articles = []

logger.info(f"Extracting articles from email: {email.subject}")

# Get grouped criteria
grouped_criteria = get_search_criteria()


# Ensure rate limit is respected
current_time = time.time()
time_since_last_call = current_time - last_api_call_time
if time_since_last_call < rate_limit_interval:
sleep_time = rate_limit_interval - time_since_last_call
logger.info(f"Rate limit exceeded, sleeping for {sleep_time:.2f} seconds")
time.sleep(sleep_time)

logger.info("Requesting Gemini to extract articles from email")
prompt = get_extract_articles_prompt(email.text or email.html, grouped_criteria, get_min_relevancy_score(),True)
response = model.generate_content(prompt)
last_api_call_time = time.time() # Update the last API call time

try:
json_match = re.search(r'\[.*\]', response.text, re.DOTALL)
Expand All @@ -50,6 +62,7 @@ def extract_articles(email):
logger.info("Requesting Gemini to extract other articles from email")

enrichment_response = model.generate_content(enrichment_prompt)
last_api_call_time = time.time() # Update the last API call time

try:
enrichment_json_match = re.search(r'\[.*\]', enrichment_response.text, re.DOTALL)
Expand All @@ -63,7 +76,10 @@ def extract_articles(email):
except Exception as e:
logger.error(f"Error parsing Gemini response: {str(e)}")

articles = [article for article in articles if not article.get('need_enrichment', True)]
articles = [
article for article in articles
if not article.get('need_enrichment', False) and isinstance(article.get('criteria'), list)
]
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON from Gemini response: {str(e)}")
articles = []
Expand Down
2 changes: 1 addition & 1 deletion promts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ def get_extract_articles_prompt(content=None, grouped_criteria=None, min_relevan
return f"""
Analyze the following email content and extract information about articles mentioned.
For each article:
1. Extract the original title, description (if available), and URL (look for [LINK: url] in the text)
1. Extract the original title, description (if available), and URL (look for [LINK: url] in the text or <title>,<link>,<description> in content)
2. Rewrite the title and description in a friendlier, lighter tone with a touch of personal feel
3. Keep the rewritten content concise and engaging
4. Restrict the description to be less than 160 characters, and be more to the point
Expand Down

0 comments on commit 7ed6b6d

Please sign in to comment.