Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
# Hey, stranger! this code is for use of free rate of gemini llm
|
2 |
-
# which is limited by RPM (15/30).
|
3 |
-
#
|
4 |
-
# Nevertheless, it scrored 35% which is good for me while two questions
|
5 |
-
# were dropped due to exceeding RPM. So, it is still possible to improve,
|
6 |
-
# e.g. deploying gemini 2.0 flash lite which has double RPM limit.
|
7 |
# Try it out!
|
8 |
|
9 |
import os
|
@@ -23,7 +20,7 @@ from token_bucket import Limiter, MemoryStorage
|
|
23 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
24 |
|
25 |
# Rate limiting configuration
|
26 |
-
MAX_MODEL_CALLS_PER_MINUTE =
|
27 |
RATE_LIMIT = MAX_MODEL_CALLS_PER_MINUTE
|
28 |
TOKEN_BUCKET_CAPACITY = RATE_LIMIT
|
29 |
TOKEN_BUCKET_REFILL_RATE = RATE_LIMIT / 60.0 # Tokens per second
|
@@ -73,7 +70,7 @@ async def process_question(agent, question_text: str, task_id: str, results_log:
|
|
73 |
"""Process a single question with global rate limiting."""
|
74 |
submitted_answer = None
|
75 |
max_retries = 3
|
76 |
-
retry_delay =
|
77 |
|
78 |
for attempt in range(max_retries):
|
79 |
try:
|
@@ -84,7 +81,7 @@ async def process_question(agent, question_text: str, task_id: str, results_log:
|
|
84 |
print(f"Processing task {task_id} (attempt {attempt + 1})...")
|
85 |
submitted_answer = await asyncio.wait_for(
|
86 |
agent(question_text, task_id),
|
87 |
-
timeout=
|
88 |
)
|
89 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
90 |
print(f"Completed task {task_id} with answer: {submitted_answer[:50]}...")
|
|
|
1 |
# Hey, stranger! this code is for use of free rate of gemini llm
|
2 |
+
# which is limited by RPM (15/30).
|
3 |
+
# Nevertheless, it scrored 35% which is good for me...
|
|
|
|
|
|
|
4 |
# Try it out!
|
5 |
|
6 |
import os
|
|
|
20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
21 |
|
22 |
# Rate limiting configuration
|
23 |
+
MAX_MODEL_CALLS_PER_MINUTE = 13 # Conservative buffer below 15 RPM
|
24 |
RATE_LIMIT = MAX_MODEL_CALLS_PER_MINUTE
|
25 |
TOKEN_BUCKET_CAPACITY = RATE_LIMIT
|
26 |
TOKEN_BUCKET_REFILL_RATE = RATE_LIMIT / 60.0 # Tokens per second
|
|
|
70 |
"""Process a single question with global rate limiting."""
|
71 |
submitted_answer = None
|
72 |
max_retries = 3
|
73 |
+
retry_delay = 5 # 6 seconds for 10 RPM
|
74 |
|
75 |
for attempt in range(max_retries):
|
76 |
try:
|
|
|
81 |
print(f"Processing task {task_id} (attempt {attempt + 1})...")
|
82 |
submitted_answer = await asyncio.wait_for(
|
83 |
agent(question_text, task_id),
|
84 |
+
timeout=35 # 60-second timeout per question
|
85 |
)
|
86 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
87 |
print(f"Completed task {task_id} with answer: {submitted_answer[:50]}...")
|