siddhartharyaai commited on
Commit
8482488
·
verified ·
1 Parent(s): d4a3223

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +248 -260
utils.py CHANGED
@@ -18,29 +18,17 @@ import sys
18
 
19
  # --- Add the cloned repository to the Python path ---
20
  repo_path = os.path.join('/home', 'user', 'open_deep_research')
21
- print(f"DEBUG: repo_path = {repo_path}")
22
-
23
- # Remove /home/user/app and app.py from sys.path if they are present
24
- # This is crucial to avoid import conflicts.
25
- if '/home/user/app' in sys.path:
26
- sys.path.remove('/home/user/app')
27
- print("DEBUG: Removed /home/user/app from sys.path")
28
- if 'app.py' in sys.path:
29
- sys.path.remove('app.py')
30
- print("DEBUG: Removed app.py from sys.path")
31
-
32
-
33
  if repo_path not in sys.path:
34
- print("DEBUG: Adding repo_path to sys.path")
35
- sys.path.insert(0, repo_path) # Add to the *beginning*
36
  else:
37
- print("DEBUG: repo_path already in sys.path")
38
- print(f"DEBUG: sys.path = {sys.path}")
39
-
40
 
41
  # --- CORRECT IMPORT (for local cloned repo) ---
42
  try:
43
- from open_deep_research.agent import OpenDeepResearchAgent # Corrected import
44
  print("DEBUG: Import successful!")
45
  except ImportError as e:
46
  print(f"DEBUG: Import failed: {e}")
@@ -255,7 +243,7 @@ def generate_script(
255
  d["display_speaker"] = d["speaker"]
256
  new_dialogue_items.append(DialogueItem(**d))
257
 
258
- return Dialogue(dialogue=new_dialogue_items)
259
  except json.JSONDecodeError as e:
260
  print("[ERROR] JSON decoding (format) failed:", e)
261
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
@@ -263,254 +251,254 @@ def generate_script(
263
  print("[ERROR] JSON decoding failed:", e)
264
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
265
 
266
- def transcribe_youtube_video(video_url: str) -> str:
267
- print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
268
- video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
269
- if not video_id_match:
270
- raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
271
 
272
- video_id = video_id_match.group(1)
273
- print("[LOG] Extracted video ID:", video_id)
274
 
275
- base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
276
- params = {"video_id": video_id, "lang": "en"}
277
- headers = {
278
- "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
279
- "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
280
- }
281
 
282
- try:
283
- response = requests.get(base_url, headers=headers, params=params, timeout=30)
284
- print("[LOG] RapidAPI Response Status Code:", response.status_code)
285
- print("[LOG] RapidAPI Response Body:", response.text)
286
 
287
- if response.status_code != 200:
288
- raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
289
-
290
- data = response.json()
291
- if not isinstance(data, list) or not data:
292
- raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
293
-
294
- transcript_as_text = data[0].get('transcriptionAsText', '').strip()
295
- if not transcript_as_text:
296
- raise ValueError("transcriptionAsText field is missing or empty.")
297
-
298
- print("[LOG] Transcript retrieval successful.")
299
- print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
300
- snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
301
- print(f"[DEBUG] Transcript Snippet: {snippet}")
302
-
303
- return transcript_as_text
304
- except Exception as e:
305
- print("[ERROR] RapidAPI transcription error:", e)
306
- raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
307
-
308
- def generate_audio_mp3(text: str, speaker: str) -> str:
309
- try:
310
- import streamlit as st
311
- print(f"[LOG] Generating audio for speaker: {speaker}")
312
- language_selection = st.session_state.get("language_selection", "English (American)")
313
- if language_selection == "English (American)":
314
- print(f"[LOG] Using Deepgram for English (American)")
315
- if speaker in ["John", "Jane"]:
316
- processed_text = text
317
- else:
318
- processed_text = _preprocess_text_for_tts(text, speaker)
319
- deepgram_api_url = "https://api.deepgram.com/v1/speak"
320
- params = {"model": "aura-asteria-en"}
321
- if speaker == "John":
322
- params["model"] = "aura-zeus-en"
323
- headers = {
324
- "Accept": "audio/mpeg",
325
- "Content-Type": "application/json",
326
- "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
327
- }
328
- body = {"text": processed_text}
329
- response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
330
- if response.status_code != 200:
331
- raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
332
- content_type = response.headers.get('Content-Type', '')
333
- if 'audio/mpeg' not in content_type:
334
- raise ValueError("Unexpected Content-Type from Deepgram.")
335
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
336
- for chunk in response.iter_content(chunk_size=8192):
337
- if chunk:
338
- mp3_file.write(chunk)
339
- mp3_path = mp3_file.name
340
- audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
341
- audio_seg = effects.normalize(audio_seg)
342
- final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
343
- audio_seg.export(final_mp3_path, format="mp3")
344
- if os.path.exists(mp3_path):
345
- os.remove(mp3_path)
346
- return final_mp3_path
347
- else:
348
- print(f"[LOG] Using Murf API for language: {language_selection}")
349
- if language_selection == "Hinglish":
350
- from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
351
- text = transliterate(text, DEVANAGARI, IAST)
352
- api_key = os.environ.get("MURF_API_KEY")
353
- headers = {
354
- "Content-Type": "application/json",
355
- "Accept": "application/json",
356
- "api-key": api_key
357
- }
358
- multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
359
- if language_selection == "English (Indian)":
360
- voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
361
- elif language_selection == "Hindi":
362
- voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
363
- elif language_selection == "Hinglish":
364
- voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
365
- else:
366
- voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
367
- payload = {
368
- "audioDuration": 0,
369
- "channelType": "MONO",
370
- "encodeAsBase64": False,
371
- "format": "WAV",
372
- "modelVersion": "GEN2",
373
- "multiNativeLocale": multi_native_locale,
374
- "pitch": 0,
375
- "pronunciationDictionary": {},
376
- "rate": 0,
377
- "sampleRate": 48000,
378
- "style": "Conversational",
379
- "text": text,
380
- "variation": 1,
381
- "voiceId": voice_id
382
- }
383
- response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
384
- if response.status_code != 200:
385
- raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
386
- json_resp = response.json()
387
- audio_url = json_resp.get("audioFile")
388
- if not audio_url:
389
- raise ValueError("No audio file URL returned by Murf API")
390
- audio_response = requests.get(audio_url)
391
- if audio_response.status_code != 200:
392
- raise ValueError(f"Error fetching audio from {audio_url}")
393
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
394
- wav_file.write(audio_response.content)
395
- wav_path = wav_file.name
396
- audio_seg = AudioSegment.from_file(wav_path, format="wav")
397
- audio_seg = effects.normalize(audio_seg)
398
- final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
399
- audio_seg.export(final_mp3_path, format="mp3")
400
- os.remove(wav_path)
401
- return final_mp3_path
402
- except Exception as e:
403
- print("[ERROR] Error generating audio:", e)
404
- raise ValueError(f"Error generating audio: {str(e)}")
405
-
406
- def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
407
- pass
408
-
409
- def _preprocess_text_for_tts(text: str, speaker: str) -> str:
410
- text = re.sub(r"\bNo\.\b", "Number", text)
411
- text = re.sub(r"\b(?i)SaaS\b", "sass", text)
412
- abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
413
- def insert_periods_for_abbrev(m):
414
- abbr = m.group(0)
415
- if abbr in abbreviations_as_words:
416
- return abbr
417
- return ".".join(list(abbr)) + "."
418
- text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
419
- text = re.sub(r"\.\.", ".", text)
420
- def remove_periods_for_tts(m):
421
- return m.group().replace(".", " ").strip()
422
- text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
423
- text = re.sub(r"-", " ", text)
424
- text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
425
- text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
426
- text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
427
- if speaker != "Jane":
428
- def insert_thinking_pause(m):
429
- word = m.group(1)
430
- if random.random() < 0.3:
431
- filler = random.choice(['hmm,', 'well,', 'let me see,'])
432
- return f"{word}..., {filler}"
433
- else:
434
- return f"{word}...,"
435
- keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
436
- text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
437
- conj_pattern = r"\b(and|but|so|because|however)\b"
438
- text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
439
- text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
440
- def capitalize_match(m):
441
- return m.group().upper()
442
- text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
443
- return text.strip()
444
-
445
- def _spell_digits(d: str) -> str:
446
- digit_map = {
447
- '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
448
- '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
449
- '8': 'eight', '9': 'nine'
450
- }
451
- return " ".join(digit_map[ch] for ch in d if ch in digit_map)
452
 
453
- def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
454
- if custom_music_path:
455
- music_path = custom_music_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  else:
457
- music_path = "bg_music.mp3"
458
-
459
- try:
460
- bg_music = AudioSegment.from_file(music_path, format="mp3")
461
- except Exception as e:
462
- print("[ERROR] Failed to load background music:", e)
463
- return spoken
464
-
465
- bg_music = bg_music - 18.0
466
- total_length_ms = len(spoken) + 2000
467
- looped_music = AudioSegment.empty()
468
- while len(looped_music) < total_length_ms:
469
- looped_music += bg_music
470
- looped_music = looped_music[:total_length_ms]
471
- final_mix = looped_music.overlay(spoken, position=2000)
472
- return final_mix
473
-
474
- def call_groq_api_for_qa(system_prompt: str) -> str:
475
- #Kept for use, Changed model
476
- try:
477
  headers = {
478
- "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
479
  "Content-Type": "application/json",
480
- "Accept": "application/json"
 
481
  }
482
- data = {
483
- "model": "deepseek-r1-distill-llama-70b", #Using Deepseek
484
- "messages": [{"role": "user", "content": system_prompt}],
485
- "max_tokens": 512,
486
- "temperature": 0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  }
488
- response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
489
- headers=headers, data=json.dumps(data))
490
- response.raise_for_status()
491
- return response.json()["choices"][0]["message"]["content"].strip()
492
- except Exception as e:
493
- print("[ERROR] Groq API error:", e)
494
- fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
495
- return json.dumps(fallback)
496
-
497
- # --- Agent and Tavily Integration ---
498
- def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
499
- """
500
- Runs the new research agent to generate a research report.
501
- """
502
- print(f"[LOG] Starting research agent for topic: {topic}")
503
- try:
504
- # Use the Groq API key here
505
- agent = OpenDeepResearchAgent(query=topic, max_results=max_results, api_key=os.environ.get("TAVILY_API_KEY"))
506
- report_content = agent.run()
507
- print("[LOG] Research agent completed successfully.")
508
-
509
- # Now, use the report_structure module to generate the structured report.
510
- structured_report = generate_report(report_content)
511
- return structured_report
512
-
513
-
514
- except Exception as e:
515
- print(f"[ERROR] Error in research agent: {e}")
516
- return f"Sorry, I encountered an error during research: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # --- Add the cloned repository to the Python path ---
20
  repo_path = os.path.join('/home', 'user', 'open_deep_research')
21
+ print(f"DEBUG: repo_path = {repo_path}") # Debug print - keep this for now
 
 
 
 
 
 
 
 
 
 
 
22
  if repo_path not in sys.path:
23
+ print("DEBUG: Adding repo_path to sys.path") # Debug print - keep this
24
+ sys.path.insert(0, repo_path)
25
  else:
26
+ print("DEBUG: repo_path already in sys.path") # Debug print - keep this for now
27
+ print(f"DEBUG: sys.path = {sys.path}") # Debug print - keep this for now
 
28
 
29
  # --- CORRECT IMPORT (for local cloned repo) ---
30
  try:
31
+ from open_deep_research.agent import OpenDeepResearchAgent
32
  print("DEBUG: Import successful!")
33
  except ImportError as e:
34
  print(f"DEBUG: Import failed: {e}")
 
243
  d["display_speaker"] = d["speaker"]
244
  new_dialogue_items.append(DialogueItem(**d))
245
 
246
+ return Dialogue(dialogue=new_dialogue_items)
247
  except json.JSONDecodeError as e:
248
  print("[ERROR] JSON decoding (format) failed:", e)
249
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
 
251
  print("[ERROR] JSON decoding failed:", e)
252
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
253
 
254
+ def transcribe_youtube_video(video_url: str) -> str:
255
+ print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
256
+ video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
257
+ if not video_id_match:
258
+ raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
259
 
260
+ video_id = video_id_match.group(1)
261
+ print("[LOG] Extracted video ID:", video_id)
262
 
263
+ base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
264
+ params = {"video_id": video_id, "lang": "en"}
265
+ headers = {
266
+ "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
267
+ "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
268
+ }
269
 
270
+ try:
271
+ response = requests.get(base_url, headers=headers, params=params, timeout=30)
272
+ print("[LOG] RapidAPI Response Status Code:", response.status_code)
273
+ print("[LOG] RapidAPI Response Body:", response.text)
274
 
275
+ if response.status_code != 200:
276
+ raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
277
+
278
+ data = response.json()
279
+ if not isinstance(data, list) or not data:
280
+ raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
+ transcript_as_text = data[0].get('transcriptionAsText', '').strip()
283
+ if not transcript_as_text:
284
+ raise ValueError("transcriptionAsText field is missing or empty.")
285
+
286
+ print("[LOG] Transcript retrieval successful.")
287
+ print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
288
+ snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
289
+ print(f"[DEBUG] Transcript Snippet: {snippet}")
290
+
291
+ return transcript_as_text
292
+ except Exception as e:
293
+ print("[ERROR] RapidAPI transcription error:", e)
294
+ raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
295
+
296
+ def generate_audio_mp3(text: str, speaker: str) -> str:
297
+ try:
298
+ import streamlit as st
299
+ print(f"[LOG] Generating audio for speaker: {speaker}")
300
+ language_selection = st.session_state.get("language_selection", "English (American)")
301
+ if language_selection == "English (American)":
302
+ print(f"[LOG] Using Deepgram for English (American)")
303
+ if speaker in ["John", "Jane"]:
304
+ processed_text = text
305
+ else:
306
+ processed_text = _preprocess_text_for_tts(text, speaker)
307
+ deepgram_api_url = "https://api.deepgram.com/v1/speak"
308
+ params = {"model": "aura-asteria-en"}
309
+ if speaker == "John":
310
+ params["model"] = "aura-zeus-en"
311
+ headers = {
312
+ "Accept": "audio/mpeg",
313
+ "Content-Type": "application/json",
314
+ "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
315
+ }
316
+ body = {"text": processed_text}
317
+ response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
318
+ if response.status_code != 200:
319
+ raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
320
+ content_type = response.headers.get('Content-Type', '')
321
+ if 'audio/mpeg' not in content_type:
322
+ raise ValueError("Unexpected Content-Type from Deepgram.")
323
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
324
+ for chunk in response.iter_content(chunk_size=8192):
325
+ if chunk:
326
+ mp3_file.write(chunk)
327
+ mp3_path = mp3_file.name
328
+ audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
329
+ audio_seg = effects.normalize(audio_seg)
330
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
331
+ audio_seg.export(final_mp3_path, format="mp3")
332
+ if os.path.exists(mp3_path):
333
+ os.remove(mp3_path)
334
+ return final_mp3_path
335
  else:
336
+ print(f"[LOG] Using Murf API for language: {language_selection}")
337
+ if language_selection == "Hinglish":
338
+ from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
339
+ text = transliterate(text, DEVANAGARI, IAST)
340
+ api_key = os.environ.get("MURF_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  headers = {
 
342
  "Content-Type": "application/json",
343
+ "Accept": "application/json",
344
+ "api-key": api_key
345
  }
346
+ multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
347
+ if language_selection == "English (Indian)":
348
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
349
+ elif language_selection == "Hindi":
350
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
351
+ elif language_selection == "Hinglish":
352
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
353
+ else:
354
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
355
+ payload = {
356
+ "audioDuration": 0,
357
+ "channelType": "MONO",
358
+ "encodeAsBase64": False,
359
+ "format": "WAV",
360
+ "modelVersion": "GEN2",
361
+ "multiNativeLocale": multi_native_locale,
362
+ "pitch": 0,
363
+ "pronunciationDictionary": {},
364
+ "rate": 0,
365
+ "sampleRate": 48000,
366
+ "style": "Conversational",
367
+ "text": text,
368
+ "variation": 1,
369
+ "voiceId": voice_id
370
  }
371
+ response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
372
+ if response.status_code != 200:
373
+ raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
374
+ json_resp = response.json()
375
+ audio_url = json_resp.get("audioFile")
376
+ if not audio_url:
377
+ raise ValueError("No audio file URL returned by Murf API")
378
+ audio_response = requests.get(audio_url)
379
+ if audio_response.status_code != 200:
380
+ raise ValueError(f"Error fetching audio from {audio_url}")
381
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
382
+ wav_file.write(audio_response.content)
383
+ wav_path = wav_file.name
384
+ audio_seg = AudioSegment.from_file(wav_path, format="wav")
385
+ audio_seg = effects.normalize(audio_seg)
386
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
387
+ audio_seg.export(final_mp3_path, format="mp3")
388
+ os.remove(wav_path)
389
+ return final_mp3_path
390
+ except Exception as e:
391
+ print("[ERROR] Error generating audio:", e)
392
+ raise ValueError(f"Error generating audio: {str(e)}")
393
+
394
+ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
395
+ pass
396
+
397
+ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
398
+ text = re.sub(r"\bNo\.\b", "Number", text)
399
+ text = re.sub(r"\b(?i)SaaS\b", "sass", text)
400
+ abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
401
+ def insert_periods_for_abbrev(m):
402
+ abbr = m.group(0)
403
+ if abbr in abbreviations_as_words:
404
+ return abbr
405
+ return ".".join(list(abbr)) + "."
406
+ text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
407
+ text = re.sub(r"\.\.", ".", text)
408
+ def remove_periods_for_tts(m):
409
+ return m.group().replace(".", " ").strip()
410
+ text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
411
+ text = re.sub(r"-", " ", text)
412
+ text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
413
+ text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
414
+ text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
415
+ if speaker != "Jane":
416
+ def insert_thinking_pause(m):
417
+ word = m.group(1)
418
+ if random.random() < 0.3:
419
+ filler = random.choice(['hmm,', 'well,', 'let me see,'])
420
+ return f"{word}..., {filler}"
421
+ else:
422
+ return f"{word}...,"
423
+ keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
424
+ text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
425
+ conj_pattern = r"\b(and|but|so|because|however)\b"
426
+ text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
427
+ text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
428
+ def capitalize_match(m):
429
+ return m.group().upper()
430
+ text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
431
+ return text.strip()
432
+
433
+ def _spell_digits(d: str) -> str:
434
+ digit_map = {
435
+ '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
436
+ '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
437
+ '8': 'eight', '9': 'nine'
438
+ }
439
+ return " ".join(digit_map[ch] for ch in d if ch in digit_map)
440
+
441
+ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
442
+ if custom_music_path:
443
+ music_path = custom_music_path
444
+ else:
445
+ music_path = "bg_music.mp3"
446
+
447
+ try:
448
+ bg_music = AudioSegment.from_file(music_path, format="mp3")
449
+ except Exception as e:
450
+ print("[ERROR] Failed to load background music:", e)
451
+ return spoken
452
+
453
+ bg_music = bg_music - 18.0
454
+ total_length_ms = len(spoken) + 2000
455
+ looped_music = AudioSegment.empty()
456
+ while len(looped_music) < total_length_ms:
457
+ looped_music += bg_music
458
+ looped_music = looped_music[:total_length_ms]
459
+ final_mix = looped_music.overlay(spoken, position=2000)
460
+ return final_mix
461
+
462
+ def call_groq_api_for_qa(system_prompt: str) -> str:
463
+ #Kept for use, Changed model
464
+ try:
465
+ headers = {
466
+ "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
467
+ "Content-Type": "application/json",
468
+ "Accept": "application/json"
469
+ }
470
+ data = {
471
+ "model": "deepseek-r1-distill-llama-70b", #Using Deepseek
472
+ "messages": [{"role": "user", "content": system_prompt}],
473
+ "max_tokens": 512,
474
+ "temperature": 0.7
475
+ }
476
+ response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
477
+ headers=headers, data=json.dumps(data))
478
+ response.raise_for_status()
479
+ return response.json()["choices"][0]["message"]["content"].strip()
480
+ except Exception as e:
481
+ print("[ERROR] Groq API error:", e)
482
+ fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
483
+ return json.dumps(fallback)
484
+
485
+ # --- Agent and Tavily Integration ---
486
+ def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
487
+ """
488
+ Runs the new research agent to generate a research report.
489
+ """
490
+ print(f"[LOG] Starting research agent for topic: {topic}")
491
+ try:
492
+ # Use the Groq API key here
493
+ agent = OpenDeepResearchAgent(query=topic, max_results=max_results, api_key=os.environ.get("TAVILY_API_KEY"))
494
+ report_content = agent.run()
495
+ print("[LOG] Research agent completed successfully.")
496
+
497
+ # Now, use the report_structure module to generate the structured report.
498
+ structured_report = generate_report(report_content)
499
+ return structured_report
500
+
501
+
502
+ except Exception as e:
503
+ print(f"[ERROR] Error in research agent: {e}")
504
+ return f"Sorry, I encountered an error during research: {e}"