siddhartharyaai commited on
Commit
257796c
·
verified ·
1 Parent(s): 15181c0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +245 -131
utils.py CHANGED
@@ -52,6 +52,7 @@ def call_llm_with_retry(groq_client, **payload):
52
  try:
53
  print(f"[DEBUG] call_llm_with_retry attempt {attempt+1}")
54
  response = groq_client.chat.completions.create(**payload)
 
55
  time.sleep(0.3)
56
  print("[DEBUG] LLM call succeeded, returning response.")
57
  return response
@@ -114,61 +115,178 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
114
 
115
 
116
  ###############################################################################
117
- # ITERATIVE MERGING HELPER FUNCTION (BATCH PROCESSING STRATEGY)
118
  ###############################################################################
119
 
120
- def iterative_merge_summaries(summaries: List[str], groq_client, references_text: str) -> str:
 
 
 
 
 
 
 
 
 
121
  """
122
- Iteratively merge a list of summaries into one final consolidated summary.
123
- Summaries are grouped into batches (default batch size 3) whose combined token count is kept below a threshold.
124
  """
125
- tokenizer = tiktoken.get_encoding("cl100k_base")
126
- max_merge_input_tokens = 2000 # Safe threshold for each merge call
127
- round_index = 1
128
- current_summaries = summaries
129
- while len(current_summaries) > 1:
130
- print(f"[LOG] Iterative merging round {round_index}: {len(current_summaries)} summaries to merge.")
131
- new_summaries = []
132
- i = 0
133
- while i < len(current_summaries):
134
- batch = []
135
- batch_tokens = 0
136
- while i < len(current_summaries):
137
- summary = current_summaries[i]
138
- summary_tokens = len(tokenizer.encode(summary))
139
- if batch_tokens + summary_tokens <= max_merge_input_tokens or not batch:
140
- batch.append(summary)
141
- batch_tokens += summary_tokens
142
- i += 1
143
- else:
144
- break
145
- batch_text = "\n\n".join(batch)
146
- merge_prompt = f"""
147
- You are a specialized summarization engine. Merge the following summaries into one comprehensive summary.
148
- Summaries:
149
- {batch_text}
150
- References (if any):
151
- {references_text}
152
- Please output the merged summary.
153
- """
154
- data = {
155
- "model": MODEL_COMBINATION,
156
- "messages": [{"role": "user", "content": merge_prompt}],
157
- "temperature": 0.3,
158
- "max_tokens": 4096
159
- }
160
- merge_response = call_llm_with_retry(groq_client, **data)
161
- merged_batch = merge_response.choices[0].message.content.strip()
162
- merged_batch = re.sub(r"<think>.*?</think>", "", merged_batch, flags=re.DOTALL).strip()
163
- new_summaries.append(merged_batch)
164
- current_summaries = new_summaries
165
- print(f"[LOG] Iterative merge round {round_index} produced {len(current_summaries)} consolidated summaries.")
166
- round_index += 1
167
- return current_summaries[0] if current_summaries else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
 
170
  ###############################################################################
171
- # AUDIO GENERATION (TTS) AND BG MUSIC MIX
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  ###############################################################################
173
 
174
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
@@ -181,11 +299,13 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
181
  if abbr in abbreviations_as_words:
182
  return abbr
183
  return ".".join(list(abbr)) + "."
 
184
  text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
185
  text = re.sub(r"\.\.", ".", text)
186
 
187
  def remove_periods_for_tts(m):
188
  return m.group().replace(".", " ").strip()
 
189
  text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
190
  text = re.sub(r"-", " ", text)
191
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
@@ -204,9 +324,12 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
204
  text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
205
  conj_pattern = r"\b(and|but|so|because|however)\b"
206
  text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
 
207
  text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
 
208
  def capitalize_after_sentence(m):
209
  return m.group().upper()
 
210
  text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_after_sentence, text)
211
  return text.strip()
212
 
@@ -230,6 +353,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
230
  body = {"text": processed_text}
231
  r = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
232
  r.raise_for_status()
 
233
  content_type = r.headers.get("Content-Type", "")
234
  if "audio/mpeg" not in content_type:
235
  raise ValueError("Unexpected content-type from Deepgram TTS.")
@@ -238,6 +362,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
238
  if chunk:
239
  mp3_file.write(chunk)
240
  mp3_path = mp3_file.name
 
241
  audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
242
  audio_seg = effects.normalize(audio_seg)
243
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
@@ -245,6 +370,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
245
  if os.path.exists(mp3_path):
246
  os.remove(mp3_path)
247
  return final_mp3_path
 
248
  else:
249
  print("[LOG] Using Murf API for TTS. Language=", language_selection)
250
  from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
@@ -263,6 +389,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
263
  voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
264
  else:
265
  voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
 
266
  payload = {
267
  "audioDuration": 0,
268
  "channelType": "MONO",
@@ -287,9 +414,11 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
287
  raise ValueError("No audioFile URL from Murf API.")
288
  audio_resp = requests.get(audio_url)
289
  audio_resp.raise_for_status()
 
290
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
291
  wav_file.write(audio_resp.content)
292
  wav_path = wav_file.name
 
293
  audio_seg = AudioSegment.from_file(wav_path, format="wav")
294
  audio_seg = effects.normalize(audio_seg)
295
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
@@ -308,11 +437,13 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
308
  music_path = custom_music_path
309
  else:
310
  music_path = "bg_music.mp3"
 
311
  try:
312
  bg_music = AudioSegment.from_file(music_path, format="mp3")
313
  except Exception as e:
314
  print("[ERROR] Failed to load background music:", e)
315
  return spoken
 
316
  bg_music = bg_music - 18.0
317
  total_length_ms = len(spoken) + 2000
318
  looped_music = AudioSegment.empty()
@@ -352,6 +483,61 @@ def call_groq_api_for_qa(system_prompt: str) -> str:
352
  return json.dumps(fallback)
353
 
354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  ###############################################################################
356
  # LOW-CALL RESEARCH AGENT (Minimizing LLM Calls)
357
  ###############################################################################
@@ -370,11 +556,10 @@ def run_research_agent(
370
  2) Firecrawl scrape => combined text.
371
  3) Use the full combined text without truncation.
372
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
373
- 5) Iteratively merge the summaries into a consolidated summary.
374
- 6) Final merge using an enhanced prompt with detailed guidelines to produce the full research report.
375
- If the report output is incomplete, the model will output "CONTINUE" so that additional calls
376
- can be made to retrieve the rest of the report.
377
- (Total LLM calls: 2 or more, but no more than 10.)
378
  """
379
  print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
380
 
@@ -388,6 +573,7 @@ def run_research_agent(
388
  if not search_results:
389
  print("[LOG] No relevant search results found by Tavily.")
390
  return "No relevant search results found."
 
391
  references_list = [r["url"] for r in search_results if "url" in r]
392
 
393
  # Step 2: Firecrawl scraping
@@ -460,87 +646,15 @@ include key data points and context:
460
  # Step 5: Iteratively merge the chunk summaries.
461
  print("[LOG] Step 5: Iteratively merging chunk summaries.")
462
  references_text = "\n".join(f"- {url}" for url in references_list) if references_list else "None"
463
- consolidated_summary = iterative_merge_summaries(summaries, groq_client, references_text)
464
- print("[LOG] Iterative merge produced a consolidated summary.")
465
- consolidated_summary = re.sub(r"<think>.*?</think>", "", consolidated_summary, flags=re.DOTALL).strip()
466
-
467
- # Step 6: Final merge to generate the full research report.
468
- final_prompt = f"""
469
- IMPORTANT: Do NOT include any chain-of-thought, internal planning, or hidden reasoning in the final output.
470
- Draft a professional, world-class research report that adheres to the following tenets:
471
-
472
- I. Essential Principles and Qualities:
473
- - Accuracy: Present accurate facts with no spelling or grammatical errors.
474
- - Clarity: Use clear, straightforward language.
475
- - Brevity: Be concise yet complete.
476
- - Objectivity: Avoid personal bias.
477
- - Simplicity: Use simple language, and explain any necessary technical jargon briefly.
478
- - Logical Sequence: Arrange points in a logical order with proper planning.
479
- - Proper Form and Presentation: Follow required formats with an attractive presentation.
480
- - Selectiveness: Include only necessary content.
481
- - Comprehensiveness: Provide complete and detailed coverage.
482
- - Reliability, Coherence, and Relevance: Ensure a logical flow and relevance to the research questions.
483
-
484
- II. Structure the Report as Follows:
485
- - Title Page (with a concise descriptive title)
486
- - Table of Contents
487
- - Executive Summary
488
- - Introduction (clearly outlining the research purpose and objectives)
489
- - Historical or Contextual Background
490
- - Detailed Findings organized into coherent thematic sections
491
- - Conclusion (with recommendations and insights)
492
- - References/Bibliography (listing the provided URLs)
493
-
494
- III. Content and Writing Style:
495
- - Use consistent and clear language.
496
- - Support arguments with reliable evidence.
497
- - Write in active voice with clear headings and a logical flow.
498
- - Develop each section in multiple detailed paragraphs.
499
-
500
- IV. Steps for Writing the Report:
501
- - Write a clear thesis statement.
502
- - Prepare an outline and develop content sequentially.
503
-
504
- Consolidated Summary:
505
- {consolidated_summary}
506
-
507
- References (URLs):
508
- {references_text}
509
-
510
- Now, merge the above into one thoroughly expanded, detailed, and exhaustive research report.
511
- If the report is incomplete, please output "CONTINUE" at the end; otherwise, end with "END_OF_REPORT".
512
- """
513
- final_data = {
514
- "model": MODEL_COMBINATION,
515
- "messages": [{"role": "user", "content": final_prompt}],
516
- "temperature": 0.3,
517
- "max_tokens": 4096
518
- }
519
- final_response = call_llm_with_retry(groq_client, **final_data)
520
- final_text = final_response.choices[0].message.content.strip()
521
-
522
- # Continuation loop: if the report does not include END_OF_REPORT, ask for continuation.
523
- while "END_OF_REPORT" not in final_text:
524
- print("[LOG] Final output incomplete. Requesting continuation...")
525
- continuation_prompt = "The previous report ended with 'CONTINUE'. Please continue the report from where it left off, and when finished, output 'END_OF_REPORT'."
526
- cont_data = {
527
- "model": MODEL_COMBINATION,
528
- "messages": [{"role": "user", "content": continuation_prompt}],
529
- "temperature": 0.3,
530
- "max_tokens": 4096
531
- }
532
- cont_response = call_llm_with_retry(groq_client, **cont_data)
533
- cont_text = cont_response.choices[0].message.content.strip()
534
- final_text += "\n" + cont_text
535
 
536
  # --- NEW POST-PROCESSING STEP ---
537
- # Remove any lingering chain-of-thought markers and the END/CONTINUE tokens.
538
- final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL)
539
- final_text = final_text.replace("END_OF_REPORT", "").replace("CONTINUE", "").strip()
540
  # ------------------------------
541
 
542
- # Step 7: PDF generation
543
- print("[LOG] Step 7: Generating final PDF from the merged text.")
544
  final_report = generate_report(final_text)
545
 
546
  print("[LOG] Done! Returning PDF from run_research_agent (low-call).")
@@ -548,4 +662,4 @@ If the report is incomplete, please output "CONTINUE" at the end; otherwise, end
548
 
549
  except Exception as e:
550
  print(f"[ERROR] Error in run_research_agent: {e}")
551
- return f"Sorry, encountered an error: {str(e)}"
 
52
  try:
53
  print(f"[DEBUG] call_llm_with_retry attempt {attempt+1}")
54
  response = groq_client.chat.completions.create(**payload)
55
+ # Short sleep to avoid bursting usage
56
  time.sleep(0.3)
57
  print("[DEBUG] LLM call succeeded, returning response.")
58
  return response
 
115
 
116
 
117
  ###############################################################################
118
+ # PODCAST SCRIPT GENERATION (Single Call)
119
  ###############################################################################
120
 
121
+ def generate_script(
122
+ system_prompt: str,
123
+ input_text: str,
124
+ tone: str,
125
+ target_length: str,
126
+ host_name: str = "Jane",
127
+ guest_name: str = "John",
128
+ sponsor_style: str = "Separate Break",
129
+ sponsor_provided=None
130
+ ):
131
  """
132
+ If you do a single call to generate the entire script.
133
+ Uses DEEPSEEK_R1. Just ensure you parse the JSON.
134
  """
135
+ print("[LOG] Generating script with tone:", tone, "and length:", target_length)
136
+
137
+ language_selection = st.session_state.get("language_selection", "English (American)")
138
+ if (host_name == "Jane" or not host_name) and language_selection in ["English (Indian)", "Hinglish", "Hindi"]:
139
+ host_name = "Isha"
140
+ if (guest_name == "John" or not guest_name) and language_selection in ["English (Indian)", "Hinglish", "Hindi"]:
141
+ guest_name = "Aarav"
142
+
143
+ words_per_minute = 150
144
+ numeric_minutes = 3
145
+ match = re.search(r"(\d+)", target_length)
146
+ if match:
147
+ numeric_minutes = int(match.group(1))
148
+
149
+ min_words = max(50, numeric_minutes * 100)
150
+ max_words = numeric_minutes * words_per_minute
151
+
152
+ tone_map = {
153
+ "Humorous": "funny and exciting, makes people chuckle",
154
+ "Formal": "business-like, well-structured, professional",
155
+ "Casual": "like a conversation between close friends, relaxed and informal",
156
+ "Youthful": "like how teenagers might chat, energetic and lively"
157
+ }
158
+ chosen_tone = tone_map.get(tone, "casual")
159
+
160
+ if sponsor_provided:
161
+ if sponsor_style == "Separate Break":
162
+ sponsor_instructions = (
163
+ "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
164
+ "Use 'Now a word from our sponsor...' and end with 'Back to the show', etc."
165
+ )
166
+ else:
167
+ sponsor_instructions = (
168
+ "If sponsor content is provided, blend it naturally (~30 seconds) into conversation. "
169
+ "Avoid abrupt transitions."
170
+ )
171
+ else:
172
+ sponsor_instructions = ""
173
+
174
+ prompt = (
175
+ f"{system_prompt}\n"
176
+ f"TONE: {chosen_tone}\n"
177
+ f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
178
+ f"INPUT TEXT: {input_text}\n\n"
179
+ f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
180
+ "Please provide the output in the following JSON format without any extra text:\n"
181
+ "{\n"
182
+ ' "dialogue": [\n'
183
+ ' { "speaker": "Jane", "text": "..." },\n'
184
+ ' { "speaker": "John", "text": "..." }\n'
185
+ " ]\n"
186
+ "}"
187
+ )
188
+ if language_selection == "Hinglish":
189
+ prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
190
+ elif language_selection == "Hindi":
191
+ prompt += "\n\nPlease generate the script exclusively in Hindi.\n"
192
+
193
+ print("[LOG] Sending script generation prompt to LLM.")
194
+ try:
195
+ headers = {
196
+ "Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
197
+ "Content-Type": "application/json"
198
+ }
199
+ data = {
200
+ "model": "deepseek/deepseek-r1",
201
+ "messages": [{"role": "user", "content": prompt}],
202
+ "max_tokens": 2048,
203
+ "temperature": 0.7
204
+ }
205
+ resp = requests.post("https://openrouter.ai/api/v1/chat/completions",
206
+ headers=headers, data=json.dumps(data))
207
+ resp.raise_for_status()
208
+ raw_content = resp.json()["choices"][0]["message"]["content"].strip()
209
+ except Exception as e:
210
+ print("[ERROR] LLM error generating script:", e)
211
+ raise ValueError(f"Error generating script: {str(e)}")
212
+
213
+ start_idx = raw_content.find("{")
214
+ end_idx = raw_content.rfind("}")
215
+ if start_idx == -1 or end_idx == -1:
216
+ raise ValueError("No JSON found in LLM response for script generation.")
217
+
218
+ json_str = raw_content[start_idx:end_idx+1]
219
+ try:
220
+ data_js = json.loads(json_str)
221
+ dialogue_list = data_js.get("dialogue", [])
222
+
223
+ # Adjust speaker names if they match
224
+ for d in dialogue_list:
225
+ raw_speaker = d.get("speaker", "Jane")
226
+ if raw_speaker.lower() == host_name.lower():
227
+ d["speaker"] = "Jane"
228
+ d["display_speaker"] = host_name
229
+ elif raw_speaker.lower() == guest_name.lower():
230
+ d["speaker"] = "John"
231
+ d["display_speaker"] = guest_name
232
+ else:
233
+ d["speaker"] = "Jane"
234
+ d["display_speaker"] = raw_speaker
235
+
236
+ new_dialogue_items = []
237
+ for d in dialogue_list:
238
+ if "display_speaker" not in d:
239
+ d["display_speaker"] = d["speaker"]
240
+ new_dialogue_items.append(DialogueItem(**d))
241
+
242
+ return Dialogue(dialogue=new_dialogue_items)
243
+
244
+ except json.JSONDecodeError as e:
245
+ print("[ERROR] JSON decoding failed for script generation:", e)
246
+ raise ValueError(f"Script parse error: {str(e)}")
247
+ except Exception as e:
248
+ print("[ERROR] Unknown error parsing script JSON:", e)
249
+ raise ValueError(f"Script parse error: {str(e)}")
250
 
251
 
252
  ###############################################################################
253
+ # YOUTUBE TRANSCRIPTION (RAPIDAPI)
254
+ ###############################################################################
255
+
256
+ def transcribe_youtube_video(video_url: str) -> str:
257
+ print("[LOG] Transcribing YouTube video:", video_url)
258
+ match = re.search(r"(?:v=|/)([0-9A-Za-z_-]{11})", video_url)
259
+ if not match:
260
+ raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
261
+ video_id = match.group(1)
262
+ print("[LOG] Extracted video ID:", video_id)
263
+
264
+ base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
265
+ params = {"video_id": video_id, "lang": "en"}
266
+ headers = {
267
+ "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
268
+ "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
269
+ }
270
+ try:
271
+ resp = requests.get(base_url, headers=headers, params=params, timeout=30)
272
+ resp.raise_for_status()
273
+ data = resp.json()
274
+ if not isinstance(data, list) or not data:
275
+ raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
276
+
277
+ transcript_as_text = data[0].get("transcriptionAsText", "").strip()
278
+ if not transcript_as_text:
279
+ raise ValueError("transcriptionAsText missing or empty in RapidAPI response.")
280
+
281
+ print("[LOG] Transcript retrieval successful. Sample:", transcript_as_text[:200], "...")
282
+ return transcript_as_text
283
+ except Exception as e:
284
+ print("[ERROR] YouTube transcription error:", e)
285
+ raise ValueError(f"Error transcribing YouTube video: {str(e)}")
286
+
287
+
288
+ ###############################################################################
289
+ # AUDIO GENERATION (TTS) AND BG MUSIC MIX
290
  ###############################################################################
291
 
292
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
 
299
  if abbr in abbreviations_as_words:
300
  return abbr
301
  return ".".join(list(abbr)) + "."
302
+
303
  text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
304
  text = re.sub(r"\.\.", ".", text)
305
 
306
  def remove_periods_for_tts(m):
307
  return m.group().replace(".", " ").strip()
308
+
309
  text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
310
  text = re.sub(r"-", " ", text)
311
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
 
324
  text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
325
  conj_pattern = r"\b(and|but|so|because|however)\b"
326
  text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
327
+
328
  text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
329
+
330
  def capitalize_after_sentence(m):
331
  return m.group().upper()
332
+
333
  text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_after_sentence, text)
334
  return text.strip()
335
 
 
353
  body = {"text": processed_text}
354
  r = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
355
  r.raise_for_status()
356
+
357
  content_type = r.headers.get("Content-Type", "")
358
  if "audio/mpeg" not in content_type:
359
  raise ValueError("Unexpected content-type from Deepgram TTS.")
 
362
  if chunk:
363
  mp3_file.write(chunk)
364
  mp3_path = mp3_file.name
365
+
366
  audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
367
  audio_seg = effects.normalize(audio_seg)
368
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
 
370
  if os.path.exists(mp3_path):
371
  os.remove(mp3_path)
372
  return final_mp3_path
373
+
374
  else:
375
  print("[LOG] Using Murf API for TTS. Language=", language_selection)
376
  from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
 
389
  voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
390
  else:
391
  voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
392
+
393
  payload = {
394
  "audioDuration": 0,
395
  "channelType": "MONO",
 
414
  raise ValueError("No audioFile URL from Murf API.")
415
  audio_resp = requests.get(audio_url)
416
  audio_resp.raise_for_status()
417
+
418
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
419
  wav_file.write(audio_resp.content)
420
  wav_path = wav_file.name
421
+
422
  audio_seg = AudioSegment.from_file(wav_path, format="wav")
423
  audio_seg = effects.normalize(audio_seg)
424
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
 
437
  music_path = custom_music_path
438
  else:
439
  music_path = "bg_music.mp3"
440
+
441
  try:
442
  bg_music = AudioSegment.from_file(music_path, format="mp3")
443
  except Exception as e:
444
  print("[ERROR] Failed to load background music:", e)
445
  return spoken
446
+
447
  bg_music = bg_music - 18.0
448
  total_length_ms = len(spoken) + 2000
449
  looped_music = AudioSegment.empty()
 
483
  return json.dumps(fallback)
484
 
485
 
486
+ ###############################################################################
487
+ # ITERATIVE MERGING HELPER FUNCTION (BATCH PROCESSING STRATEGY)
488
+ ###############################################################################
489
+
490
+ def iterative_merge_summaries(summaries: List[str], groq_client, references_text: str) -> str:
491
+ """
492
+ Iteratively merge a list of summaries into one final report summary.
493
+ This function groups summaries into batches whose total token count is below a set threshold,
494
+ merges each batch, and then recursively merges the batch outputs until only one final summary remains.
495
+ """
496
+ tokenizer = tiktoken.get_encoding("cl100k_base")
497
+ max_merge_input_tokens = 2000 # Set a safe threshold for each merge call
498
+
499
+ round_index = 1
500
+ while len(summaries) > 1:
501
+ print(f"[LOG] Iterative merging round {round_index}: {len(summaries)} summaries to merge.")
502
+ new_summaries = []
503
+ i = 0
504
+ while i < len(summaries):
505
+ batch = []
506
+ batch_tokens = 0
507
+ # Group summaries until the token count exceeds threshold
508
+ while i < len(summaries):
509
+ summary = summaries[i]
510
+ summary_tokens = len(tokenizer.encode(summary))
511
+ if batch_tokens + summary_tokens <= max_merge_input_tokens or not batch:
512
+ batch.append(summary)
513
+ batch_tokens += summary_tokens
514
+ i += 1
515
+ else:
516
+ break
517
+ batch_text = "\n\n".join(batch)
518
+ merge_prompt = f"""
519
+ You are a specialized summarization engine. Merge the following summaries into one comprehensive summary.
520
+ Summaries:
521
+ {batch_text}
522
+ References (if any):
523
+ {references_text}
524
+ Please output the merged summary.
525
+ """
526
+ data = {
527
+ "model": MODEL_COMBINATION,
528
+ "messages": [{"role": "user", "content": merge_prompt}],
529
+ "temperature": 0.3,
530
+ "max_tokens": 4096
531
+ }
532
+ merge_response = call_llm_with_retry(groq_client, **data)
533
+ merged_batch = merge_response.choices[0].message.content.strip()
534
+ merged_batch = re.sub(r"<think>.*?</think>", "", merged_batch, flags=re.DOTALL).strip()
535
+ new_summaries.append(merged_batch)
536
+ summaries = new_summaries
537
+ round_index += 1
538
+ return summaries[0]
539
+
540
+
541
  ###############################################################################
542
  # LOW-CALL RESEARCH AGENT (Minimizing LLM Calls)
543
  ###############################################################################
 
556
  2) Firecrawl scrape => combined text.
557
  3) Use the full combined text without truncation.
558
  4) Split into chunks (each 4500 tokens) => Summarize each chunk individually => summaries.
559
+ 5) Iteratively merge the summaries into a final report.
560
+ If the report output is incomplete, the model will output "CONTINUE" so that additional calls
561
+ can be made to retrieve the rest of the report.
562
+ => 2 or more total LLM calls (but no more than 10) to reduce the chance of rate limit errors.
 
563
  """
564
  print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
565
 
 
573
  if not search_results:
574
  print("[LOG] No relevant search results found by Tavily.")
575
  return "No relevant search results found."
576
+
577
  references_list = [r["url"] for r in search_results if "url" in r]
578
 
579
  # Step 2: Firecrawl scraping
 
646
  # Step 5: Iteratively merge the chunk summaries.
647
  print("[LOG] Step 5: Iteratively merging chunk summaries.")
648
  references_text = "\n".join(f"- {url}" for url in references_list) if references_list else "None"
649
+ final_text = iterative_merge_summaries(summaries, groq_client, references_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  # --- NEW POST-PROCESSING STEP ---
652
+ # Remove any lingering chain-of-thought markers.
653
+ final_text = re.sub(r"<think>.*?</think>", "", final_text, flags=re.DOTALL).strip()
 
654
  # ------------------------------
655
 
656
+ # Step 6: PDF generation
657
+ print("[LOG] Step 6: Generating final PDF from the merged text.")
658
  final_report = generate_report(final_text)
659
 
660
  print("[LOG] Done! Returning PDF from run_research_agent (low-call).")
 
662
 
663
  except Exception as e:
664
  print(f"[ERROR] Error in run_research_agent: {e}")
665
+ return f"Sorry, encountered an error: {str(e)}"