SearchPod1.0

Sleeping

App Files Files Community

siddhartharyaai commited on Feb 12

Commit

49ff1ff

verified ·

1 Parent(s): 3290a02

Update utils.py

Browse files

Files changed (1) hide show

utils.py +25 -12

utils.py CHANGED Viewed

@@ -45,8 +45,7 @@ def call_llm_with_retry(groq_client, **payload):
     Wraps groq_client.chat.completions.create(**payload) in a retry loop
     to catch rate-limit errors or service unavailable (503) errors.
     If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
-    sleep, then retry. We also do a short sleep (0.3s) after each successful call
-    to spread usage.
     """
     max_retries = 3
     for attempt in range(max_retries):
@@ -60,16 +59,13 @@ def call_llm_with_retry(groq_client, **payload):
         except Exception as e:
             err_str = str(e).lower()
             print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
-            # Check for rate limit or service unavailable errors.
             if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
                 wait_time = 60.0
                 match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
                 if match:
                     wait_time = float(match.group(1)) + 1.0
-                else:
-                    # If a 503 error, use default wait time.
-                    if "503" in err_str:
-                        wait_time = 60.0
                 print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
                 time.sleep(wait_time)
             else:
@@ -502,10 +498,12 @@ def run_research_agent(
     """
     Low-Call approach:
       1) Tavily search (up to 20 URLs).
-      2) Firecrawl scrape => combined text
       3) Use the full combined text without truncation.
       4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
       5) Single final merge => final PDF.
       => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
     """
     print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
@@ -596,6 +594,7 @@ include key data points and context:
         truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
         merged_input = "\n\n".join(truncated_summaries)
         final_prompt = f"""
 IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
 Produce a long, academic-style research report with the following structure:
@@ -613,7 +612,7 @@ Requirements:
 - Minimal bullet points, prefer multi-paragraph
 - Each section at least 2-3 paragraphs
 - Aim for 1500+ words if possible
-- Under 6000 tokens total
 - Professional, academic tone
 Partial Summaries:
 {merged_input}
@@ -625,15 +624,29 @@ Now, merge these partial summaries into one thoroughly expanded research report:
             "model": MODEL_COMBINATION,
             "messages": [{"role": "user", "content": final_prompt}],
             "temperature": 0.3,
-            "max_tokens": 2048
         }
         final_response = call_llm_with_retry(groq_client, **final_data)
         final_text = final_response.choices[0].message.content.strip()
         # --- NEW POST-PROCESSING STEP ---
-        # Remove any lingering chain-of-thought content (anything between <think> and </think> tags)
         final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
-        final_text = final_text.strip()
         # ------------------------------
         # Step 6: PDF generation

     Wraps groq_client.chat.completions.create(**payload) in a retry loop
     to catch rate-limit errors or service unavailable (503) errors.
     If we see “try again in XXs,” or detect a 503 error, we parse the wait time,
+    sleep, then retry. We also do a short sleep (0.3s) after each successful call.
     """
     max_retries = 3
     for attempt in range(max_retries):
         except Exception as e:
             err_str = str(e).lower()
             print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
             if ("rate_limit_exceeded" in err_str or "try again in" in err_str or "503" in err_str):
                 wait_time = 60.0
                 match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
                 if match:
                     wait_time = float(match.group(1)) + 1.0
+                elif "503" in err_str:
+                    wait_time = 60.0
                 print(f"[WARN] Detected error (rate limit or 503). Sleeping for {wait_time:.1f}s, then retrying.")
                 time.sleep(wait_time)
             else:
     """
     Low-Call approach:
       1) Tavily search (up to 20 URLs).
+      2) Firecrawl scrape => combined text.
       3) Use the full combined text without truncation.
       4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
       5) Single final merge => final PDF.
+      If the report output is incomplete, the model will output "CONTINUE" so that additional calls
+      can be made to retrieve the rest of the report.
       => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
     """
     print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
         truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
         merged_input = "\n\n".join(truncated_summaries)
+        # Final merge prompt now instructs the model to output END_OF_REPORT when complete.
         final_prompt = f"""
 IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
 Produce a long, academic-style research report with the following structure:
 - Minimal bullet points, prefer multi-paragraph
 - Each section at least 2-3 paragraphs
 - Aim for 1500+ words if possible
+- Please output 'END_OF_REPORT' at the end if the report is complete. If it is too long, output 'CONTINUE' at the end.
 - Professional, academic tone
 Partial Summaries:
 {merged_input}
             "model": MODEL_COMBINATION,
             "messages": [{"role": "user", "content": final_prompt}],
             "temperature": 0.3,
+            "max_tokens": 4096
         }
         final_response = call_llm_with_retry(groq_client, **final_data)
         final_text = final_response.choices[0].message.content.strip()
+        # Continuation loop: if the report does not include END_OF_REPORT, ask for continuation.
+        while "END_OF_REPORT" not in final_text:
+            print("[LOG] Final output incomplete. Requesting continuation...")
+            continuation_prompt = "The previous report ended with 'CONTINUE'. Please continue the report from where it left off, and when finished, output 'END_OF_REPORT'."
+            cont_data = {
+                "model": MODEL_COMBINATION,
+                "messages": [{"role": "user", "content": continuation_prompt}],
+                "temperature": 0.3,
+                "max_tokens": 4096
+            }
+            cont_response = call_llm_with_retry(groq_client, **cont_data)
+            cont_text = cont_response.choices[0].message.content.strip()
+            final_text += "\n" + cont_text
         # --- NEW POST-PROCESSING STEP ---
+        # Remove any lingering chain-of-thought markers and the END/CONTINUE tokens.
         final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
+        final_text = final_text.replace("END_OF_REPORT", "").replace("CONTINUE", "").strip()
         # ------------------------------
         # Step 6: PDF generation