siddhartharyaai commited on
Commit
f09db25
·
verified ·
1 Parent(s): 59cf3b2

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +4 -3
utils.py CHANGED
@@ -497,7 +497,7 @@ def run_research_agent(
497
  Low-Call approach:
498
  1) Tavily search (up to 20 URLs).
499
  2) Firecrawl scrape => combined text
500
- 3) Use the full combined text (no truncation)
501
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries
502
  5) Single final merge => final PDF
503
  => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
@@ -544,8 +544,9 @@ def run_research_agent(
544
  # Step 2.5: Input Sanitization - Remove any chain-of-thought markers from the scraped content.
545
  combined_content = re.sub(r"<think>.*?</think>", "", combined_content, flags=re.DOTALL)
546
 
547
- # Step 3: Use the full combined text without truncation.
548
- print("[LOG] Step 3: Using the full combined text without truncation.")
 
549
 
550
  # Step 4: Splitting text into chunks (4500 tokens each) and summarizing each chunk.
551
  print("[LOG] Step 4: Splitting text into chunks (4500 tokens each). Summarizing each chunk.")
 
497
  Low-Call approach:
498
  1) Tavily search (up to 20 URLs).
499
  2) Firecrawl scrape => combined text
500
+ 3) Truncate to 12k tokens total
501
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries
502
  5) Single final merge => final PDF
503
  => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
 
544
  # Step 2.5: Input Sanitization - Remove any chain-of-thought markers from the scraped content.
545
  combined_content = re.sub(r"<think>.*?</think>", "", combined_content, flags=re.DOTALL)
546
 
547
+ # Step 3: Truncate to 12k tokens total
548
+ print("[LOG] Step 3: Truncating combined text to 12,000 tokens if needed.")
549
+ combined_content = truncate_text_tokens(combined_content, max_tokens=12000)
550
 
551
  # Step 4: Splitting text into chunks (4500 tokens each) and summarizing each chunk.
552
  print("[LOG] Step 4: Splitting text into chunks (4500 tokens each). Summarizing each chunk.")