SearchPod1.0

Sleeping

siddhartharyaai commited on Feb 12

Commit

f09db25

verified ·

1 Parent(s): 59cf3b2

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -497,7 +497,7 @@ def run_research_agent(
     Low-Call approach:
       1) Tavily search (up to 20 URLs).
       2) Firecrawl scrape => combined text
-      3) Use the full combined text (no truncation)
       4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries
       5) Single final merge => final PDF
       => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
@@ -544,8 +544,9 @@ def run_research_agent(
         # Step 2.5: Input Sanitization - Remove any chain-of-thought markers from the scraped content.
         combined_content = re.sub(r"<think>.*?</think>", "", combined_content, flags=re.DOTALL)
-        # Step 3: Use the full combined text without truncation.
-        print("[LOG] Step 3: Using the full combined text without truncation.")
         # Step 4: Splitting text into chunks (4500 tokens each) and summarizing each chunk.
         print("[LOG] Step 4: Splitting text into chunks (4500 tokens each). Summarizing each chunk.")

     Low-Call approach:
       1) Tavily search (up to 20 URLs).
       2) Firecrawl scrape => combined text
+      3) Truncate to 12k tokens total
       4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries
       5) Single final merge => final PDF
       => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
         # Step 2.5: Input Sanitization - Remove any chain-of-thought markers from the scraped content.
         combined_content = re.sub(r"<think>.*?</think>", "", combined_content, flags=re.DOTALL)
+        # Step 3: Truncate to 12k tokens total
+        print("[LOG] Step 3: Truncating combined text to 12,000 tokens if needed.")
+        combined_content = truncate_text_tokens(combined_content, max_tokens=12000)
         # Step 4: Splitting text into chunks (4500 tokens each) and summarizing each chunk.
         print("[LOG] Step 4: Splitting text into chunks (4500 tokens each). Summarizing each chunk.")