Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -45,8 +45,7 @@ def call_llm_with_retry(groq_client, **payload):
|
|
45 |
Wraps groq_client.chat.completions.create(**payload) in a retry loop
|
46 |
to catch rate-limit errors or service unavailable (503) errors.
|
47 |
If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
|
48 |
-
sleep, then retry. We also do a short sleep (0.3s) after each successful call
|
49 |
-
to spread usage.
|
50 |
"""
|
51 |
max_retries = 3
|
52 |
for attempt in range(max_retries):
|
@@ -60,16 +59,13 @@ def call_llm_with_retry(groq_client, **payload):
|
|
60 |
except Exception as e:
|
61 |
err_str = str(e).lower()
|
62 |
print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
|
63 |
-
# Check for rate limit or service unavailable errors.
|
64 |
if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
|
65 |
wait_time = 60.0
|
66 |
match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
|
67 |
if match:
|
68 |
wait_time = float(match.group(1)) + 1.0
|
69 |
-
|
70 |
-
|
71 |
-
if "503" in err_str:
|
72 |
-
wait_time = 60.0
|
73 |
print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
|
74 |
time.sleep(wait_time)
|
75 |
else:
|
@@ -502,10 +498,12 @@ def run_research_agent(
|
|
502 |
"""
|
503 |
Low-Call approach:
|
504 |
1) Tavily search (up to 20 URLs).
|
505 |
-
2) Firecrawl scrape => combined text
|
506 |
3) Use the full combined text without truncation.
|
507 |
4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
|
508 |
5) Single final merge => final PDF.
|
|
|
|
|
509 |
=> 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
|
510 |
"""
|
511 |
print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
|
@@ -596,6 +594,7 @@ include key data points and context:
|
|
596 |
truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
|
597 |
merged_input = "\n\n".join(truncated_summaries)
|
598 |
|
|
|
599 |
final_prompt = f"""
|
600 |
IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
|
601 |
Produce a long, academic-style research report with the following structure:
|
@@ -613,7 +612,7 @@ Requirements:
|
|
613 |
- Minimal bullet points, prefer multi-paragraph
|
614 |
- Each section at least 2-3 paragraphs
|
615 |
- Aim for 1500+ words if possible
|
616 |
-
-
|
617 |
- Professional, academic tone
|
618 |
Partial Summaries:
|
619 |
{merged_input}
|
@@ -625,15 +624,29 @@ Now, merge these partial summaries into one thoroughly expanded research report:
|
|
625 |
"model": MODEL_COMBINATION,
|
626 |
"messages": [{"role": "user", "content": final_prompt}],
|
627 |
"temperature": 0.3,
|
628 |
-
"max_tokens":
|
629 |
}
|
630 |
final_response = call_llm_with_retry(groq_client, **final_data)
|
631 |
final_text = final_response.choices[0].message.content.strip()
|
632 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
633 |
# --- NEW POST-PROCESSING STEP ---
|
634 |
-
# Remove any lingering chain-of-thought
|
635 |
final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
|
636 |
-
final_text = final_text.strip()
|
637 |
# ------------------------------
|
638 |
|
639 |
# Step 6: PDF generation
|
|
|
45 |
Wraps groq_client.chat.completions.create(**payload) in a retry loop
|
46 |
to catch rate-limit errors or service unavailable (503) errors.
|
47 |
If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
|
48 |
+
sleep, then retry. We also do a short sleep (0.3s) after each successful call.
|
|
|
49 |
"""
|
50 |
max_retries = 3
|
51 |
for attempt in range(max_retries):
|
|
|
59 |
except Exception as e:
|
60 |
err_str = str(e).lower()
|
61 |
print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
|
|
|
62 |
if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
|
63 |
wait_time = 60.0
|
64 |
match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
|
65 |
if match:
|
66 |
wait_time = float(match.group(1)) + 1.0
|
67 |
+
elif "503" in err_str:
|
68 |
+
wait_time = 60.0
|
|
|
|
|
69 |
print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
|
70 |
time.sleep(wait_time)
|
71 |
else:
|
|
|
498 |
"""
|
499 |
Low-Call approach:
|
500 |
1) Tavily search (up to 20 URLs).
|
501 |
+
2) Firecrawl scrape => combined text.
|
502 |
3) Use the full combined text without truncation.
|
503 |
4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
|
504 |
5) Single final merge => final PDF.
|
505 |
+
If the report output is incomplete, the model will output "CONTINUE" so that additional calls
|
506 |
+
can be made to retrieve the rest of the report.
|
507 |
=> 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
|
508 |
"""
|
509 |
print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
|
|
|
594 |
truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
|
595 |
merged_input = "\n\n".join(truncated_summaries)
|
596 |
|
597 |
+
# Final merge prompt now instructs the model to output END_OF_REPORT when complete.
|
598 |
final_prompt = f"""
|
599 |
IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
|
600 |
Produce a long, academic-style research report with the following structure:
|
|
|
612 |
- Minimal bullet points, prefer multi-paragraph
|
613 |
- Each section at least 2-3 paragraphs
|
614 |
- Aim for 1500+ words if possible
|
615 |
+
- Please output 'END_OF_REPORT' at the end if the report is complete. If it is too long, output 'CONTINUE' at the end.
|
616 |
- Professional, academic tone
|
617 |
Partial Summaries:
|
618 |
{merged_input}
|
|
|
624 |
"model": MODEL_COMBINATION,
|
625 |
"messages": [{"role": "user", "content": final_prompt}],
|
626 |
"temperature": 0.3,
|
627 |
+
"max_tokens": 4096
|
628 |
}
|
629 |
final_response = call_llm_with_retry(groq_client, **final_data)
|
630 |
final_text = final_response.choices[0].message.content.strip()
|
631 |
|
632 |
+
# Continuation loop: if the report does not include END_OF_REPORT, ask for continuation.
|
633 |
+
while "END_OF_REPORT" not in final_text:
|
634 |
+
print("[LOG] Final output incomplete. Requesting continuation...")
|
635 |
+
continuation_prompt = "The previous report ended with 'CONTINUE'. Please continue the report from where it left off, and when finished, output 'END_OF_REPORT'."
|
636 |
+
cont_data = {
|
637 |
+
"model": MODEL_COMBINATION,
|
638 |
+
"messages": [{"role": "user", "content": continuation_prompt}],
|
639 |
+
"temperature": 0.3,
|
640 |
+
"max_tokens": 4096
|
641 |
+
}
|
642 |
+
cont_response = call_llm_with_retry(groq_client, **cont_data)
|
643 |
+
cont_text = cont_response.choices[0].message.content.strip()
|
644 |
+
final_text += "\n" + cont_text
|
645 |
+
|
646 |
# --- NEW POST-PROCESSING STEP ---
|
647 |
+
# Remove any lingering chain-of-thought markers and the END/CONTINUE tokens.
|
648 |
final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
|
649 |
+
final_text = final_text.replace("END_OF_REPORT", "").replace("CONTINUE", "").strip()
|
650 |
# ------------------------------
|
651 |
|
652 |
# Step 6: PDF generation
|