siddhartharyaai commited on
Commit
49ff1ff
·
verified ·
1 Parent(s): 3290a02

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +25 -12
utils.py CHANGED
@@ -45,8 +45,7 @@ def call_llm_with_retry(groq_client, **payload):
45
  Wraps groq_client.chat.completions.create(**payload) in a retry loop
46
  to catch rate-limit errors or service unavailable (503) errors.
47
  If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
48
- sleep, then retry. We also do a short sleep (0.3s) after each successful call
49
- to spread usage.
50
  """
51
  max_retries = 3
52
  for attempt in range(max_retries):
@@ -60,16 +59,13 @@ def call_llm_with_retry(groq_client, **payload):
60
  except Exception as e:
61
  err_str = str(e).lower()
62
  print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
63
- # Check for rate limit or service unavailable errors.
64
  if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
65
  wait_time = 60.0
66
  match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
67
  if match:
68
  wait_time = float(match.group(1)) + 1.0
69
- else:
70
- # If a 503 error, use default wait time.
71
- if "503" in err_str:
72
- wait_time = 60.0
73
  print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
74
  time.sleep(wait_time)
75
  else:
@@ -502,10 +498,12 @@ def run_research_agent(
502
  """
503
  Low-Call approach:
504
  1) Tavily search (up to 20 URLs).
505
- 2) Firecrawl scrape => combined text
506
  3) Use the full combined text without truncation.
507
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
508
  5) Single final merge => final PDF.
 
 
509
  => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
510
  """
511
  print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
@@ -596,6 +594,7 @@ include key data points and context:
596
  truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
597
  merged_input = "\n\n".join(truncated_summaries)
598
 
 
599
  final_prompt = f"""
600
  IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
601
  Produce a long, academic-style research report with the following structure:
@@ -613,7 +612,7 @@ Requirements:
613
  - Minimal bullet points, prefer multi-paragraph
614
  - Each section at least 2-3 paragraphs
615
  - Aim for 1500+ words if possible
616
- - Under 6000 tokens total
617
  - Professional, academic tone
618
  Partial Summaries:
619
  {merged_input}
@@ -625,15 +624,29 @@ Now, merge these partial summaries into one thoroughly expanded research report:
625
  "model": MODEL_COMBINATION,
626
  "messages": [{"role": "user", "content": final_prompt}],
627
  "temperature": 0.3,
628
- "max_tokens": 2048
629
  }
630
  final_response = call_llm_with_retry(groq_client, **final_data)
631
  final_text = final_response.choices[0].message.content.strip()
632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  # --- NEW POST-PROCESSING STEP ---
634
- # Remove any lingering chain-of-thought content (anything between <think> and </think> tags)
635
  final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
636
- final_text = final_text.strip()
637
  # ------------------------------
638
 
639
  # Step 6: PDF generation
 
45
  Wraps groq_client.chat.completions.create(**payload) in a retry loop
46
  to catch rate-limit errors or service unavailable (503) errors.
47
  If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
48
+ sleep, then retry. We also do a short sleep (0.3s) after each successful call.
 
49
  """
50
  max_retries = 3
51
  for attempt in range(max_retries):
 
59
  except Exception as e:
60
  err_str = str(e).lower()
61
  print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
 
62
  if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
63
  wait_time = 60.0
64
  match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
65
  if match:
66
  wait_time = float(match.group(1)) + 1.0
67
+ elif "503" in err_str:
68
+ wait_time = 60.0
 
 
69
  print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
70
  time.sleep(wait_time)
71
  else:
 
498
  """
499
  Low-Call approach:
500
  1) Tavily search (up to 20 URLs).
501
+ 2) Firecrawl scrape => combined text.
502
  3) Use the full combined text without truncation.
503
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
504
  5) Single final merge => final PDF.
505
+ If the report output is incomplete, the model will output "CONTINUE" so that additional calls
506
+ can be made to retrieve the rest of the report.
507
  => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
508
  """
509
  print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
 
594
  truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
595
  merged_input = "\n\n".join(truncated_summaries)
596
 
597
+ # Final merge prompt now instructs the model to output END_OF_REPORT when complete.
598
  final_prompt = f"""
599
  IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
600
  Produce a long, academic-style research report with the following structure:
 
612
  - Minimal bullet points, prefer multi-paragraph
613
  - Each section at least 2-3 paragraphs
614
  - Aim for 1500+ words if possible
615
+ - Please output 'END_OF_REPORT' at the end if the report is complete. If it is too long, output 'CONTINUE' at the end.
616
  - Professional, academic tone
617
  Partial Summaries:
618
  {merged_input}
 
624
  "model": MODEL_COMBINATION,
625
  "messages": [{"role": "user", "content": final_prompt}],
626
  "temperature": 0.3,
627
+ "max_tokens": 4096
628
  }
629
  final_response = call_llm_with_retry(groq_client, **final_data)
630
  final_text = final_response.choices[0].message.content.strip()
631
 
632
+ # Continuation loop: if the report does not include END_OF_REPORT, ask for continuation.
633
+ while "END_OF_REPORT" not in final_text:
634
+ print("[LOG] Final output incomplete. Requesting continuation...")
635
+ continuation_prompt = "The previous report ended with 'CONTINUE'. Please continue the report from where it left off, and when finished, output 'END_OF_REPORT'."
636
+ cont_data = {
637
+ "model": MODEL_COMBINATION,
638
+ "messages": [{"role": "user", "content": continuation_prompt}],
639
+ "temperature": 0.3,
640
+ "max_tokens": 4096
641
+ }
642
+ cont_response = call_llm_with_retry(groq_client, **cont_data)
643
+ cont_text = cont_response.choices[0].message.content.strip()
644
+ final_text += "\n" + cont_text
645
+
646
  # --- NEW POST-PROCESSING STEP ---
647
+ # Remove any lingering chain-of-thought markers and the END/CONTINUE tokens.
648
  final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
649
+ final_text = final_text.replace("END_OF_REPORT", "").replace("CONTINUE", "").strip()
650
  # ------------------------------
651
 
652
  # Step 6: PDF generation