siddhartharyaai commited on
Commit
5857c7b
·
verified ·
1 Parent(s): 3c54914

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +305 -252
utils.py CHANGED
@@ -14,8 +14,9 @@ from groq import Groq # Retained for LLM interaction
14
  import numpy as np
15
  import torch
16
  import random
17
- from tavily import TavilyClient #For Tavily Search
18
- from report_structure import generate_report #For Report Structure
 
19
 
20
 
21
  class DialogueItem(BaseModel):
@@ -49,8 +50,8 @@ def extract_text_from_url(url):
49
  try:
50
  headers = {
51
  "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
52
- "AppleWebKit/537.36 (KHTML, like Gecko) "
53
- "Chrome/115.0.0.0 Safari/537.36")
54
  }
55
  response = requests.get(url, headers=headers)
56
  if response.status_code != 200:
@@ -73,7 +74,7 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
73
  return shifted_audio.set_frame_rate(audio.frame_rate)
74
 
75
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
76
- #This function may need adjustment. The agent decides sufficiency now.
77
  word_count = len(text.split())
78
  print(f"[DEBUG] Aggregated word count: {word_count}")
79
  return word_count >= min_word_count
@@ -82,24 +83,24 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
82
  # No longer needed
83
  pass
84
  def research_topic(topic: str) -> str:
85
- # No longer needed
86
- pass
87
 
88
  def fetch_wikipedia_summary(topic: str) -> str:
89
- #No longer needed as its replaced by TAVILY AND OPEN DEEP RESEARCH AGENT
90
- pass
91
 
92
  def fetch_rss_feed(feed_url: str) -> list:
93
- #No longer needed
94
- pass
95
 
96
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
97
- #No longer needed
98
- pass
99
 
100
  def fetch_article_text(link: str) -> str:
101
- #THIS FUNCTION IS NOW REPLACED by TAVILY EXTRACT
102
- pass
103
 
104
  def generate_script(
105
  system_prompt: str,
@@ -190,7 +191,7 @@ def generate_script(
190
  "temperature": 0.7
191
  }
192
  response = requests.post("https://openrouter.ai/api/v1/chat/completions",
193
- headers=headers, data=json.dumps(data))
194
  response.raise_for_status()
195
  raw_content = response.json()["choices"][0]["message"]["content"].strip()
196
  except Exception as e:
@@ -234,251 +235,303 @@ def generate_script(
234
  print("[ERROR] JSON decoding failed:", e)
235
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
236
 
237
- def transcribe_youtube_video(video_url: str) -> str:
238
- print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
239
- video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
240
- if not video_id_match:
241
- raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
242
-
243
- video_id = video_id_match.group(1)
244
- print("[LOG] Extracted video ID:", video_id)
245
-
246
- base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
247
- params = {"video_id": video_id, "lang": "en"}
248
- headers = {
249
- "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
250
- "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
251
- }
252
-
253
- try:
254
- response = requests.get(base_url, headers=headers, params=params, timeout=30)
255
- print("[LOG] RapidAPI Response Status Code:", response.status_code)
256
- print("[LOG] RapidAPI Response Body:", response.text)
257
-
258
- if response.status_code != 200:
259
- raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
260
-
261
- data = response.json()
262
- if not isinstance(data, list) or not data:
263
- raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
264
 
265
- transcript_as_text = data[0].get('transcriptionAsText', '').strip()
266
- if not transcript_as_text:
267
- raise ValueError("transcriptionAsText field is missing or empty.")
268
 
269
- print("[LOG] Transcript retrieval successful.")
270
- print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
271
- snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
272
- print(f"[DEBUG] Transcript Snippet: {snippet}")
 
 
273
 
274
- return transcript_as_text
275
- except Exception as e:
276
- print("[ERROR] RapidAPI transcription error:", e)
277
- raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
278
 
279
- def generate_audio_mp3(text: str, speaker: str) -> str:
280
- try:
281
- import streamlit as st
282
- print(f"[LOG] Generating audio for speaker: {speaker}")
283
- language_selection = st.session_state.get("language_selection", "English (American)")
284
- if language_selection == "English (American)":
285
- print(f"[LOG] Using Deepgram for English (American)")
286
- if speaker in ["John", "Jane"]:
287
- processed_text = text
288
- else:
289
- processed_text = _preprocess_text_for_tts(text, speaker)
290
- deepgram_api_url = "https://api.deepgram.com/v1/speak"
291
- params = {"model": "aura-asteria-en"}
292
- if speaker == "John":
293
- params["model"] = "aura-zeus-en"
294
- headers = {
295
- "Accept": "audio/mpeg",
296
- "Content-Type": "application/json",
297
- "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
298
- }
299
- body = {"text": processed_text}
300
- response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
301
  if response.status_code != 200:
302
- raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
303
- content_type = response.headers.get('Content-Type', '')
304
- if 'audio/mpeg' not in content_type:
305
- raise ValueError("Unexpected Content-Type from Deepgram.")
306
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
307
- for chunk in response.iter_content(chunk_size=8192):
308
- if chunk:
309
- mp3_file.write(chunk)
310
- mp3_path = mp3_file.name
311
- audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
312
- audio_seg = effects.normalize(audio_seg)
313
- final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
314
- audio_seg.export(final_mp3_path, format="mp3")
315
- if os.path.exists(mp3_path):
316
- os.remove(mp3_path)
317
- return final_mp3_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  else:
319
- print(f"[LOG] Using Murf API for language: {language_selection}")
320
- if language_selection == "Hinglish":
321
- from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
322
- text = transliterate(text, DEVANAGARI, IAST)
323
- api_key = os.environ.get("MURF_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  headers = {
 
325
  "Content-Type": "application/json",
326
- "Accept": "application/json",
327
- "api-key": api_key
328
  }
329
- multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
330
- if language_selection == "English (Indian)":
331
- voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
332
- elif language_selection == "Hindi":
333
- voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
334
- elif language_selection == "Hinglish":
335
- voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
336
- else:
337
- voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
338
- payload = {
339
- "audioDuration": 0,
340
- "channelType": "MONO",
341
- "encodeAsBase64": False,
342
- "format": "WAV",
343
- "modelVersion": "GEN2",
344
- "multiNativeLocale": multi_native_locale,
345
- "pitch": 0,
346
- "pronunciationDictionary": {},
347
- "rate": 0,
348
- "sampleRate": 48000,
349
- "style": "Conversational",
350
- "text": text,
351
- "variation": 1,
352
- "voiceId": voice_id
353
  }
354
- response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
355
- if response.status_code != 200:
356
- raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
357
- json_resp = response.json()
358
- audio_url = json_resp.get("audioFile")
359
- if not audio_url:
360
- raise ValueError("No audio file URL returned by Murf API")
361
- audio_response = requests.get(audio_url)
362
- if audio_response.status_code != 200:
363
- raise ValueError(f"Error fetching audio from {audio_url}")
364
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
365
- wav_file.write(audio_response.content)
366
- wav_path = wav_file.name
367
- audio_seg = AudioSegment.from_file(wav_path, format="wav")
368
- audio_seg = effects.normalize(audio_seg)
369
- final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
370
- audio_seg.export(final_mp3_path, format="mp3")
371
- os.remove(wav_path)
372
- return final_mp3_path
373
- except Exception as e:
374
- print("[ERROR] Error generating audio:", e)
375
- raise ValueError(f"Error generating audio: {str(e)}")
376
-
377
- def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
378
- pass
379
-
380
- def _preprocess_text_for_tts(text: str, speaker: str) -> str:
381
- text = re.sub(r"\bNo\.\b", "Number", text)
382
- text = re.sub(r"\b(?i)SaaS\b", "sass", text)
383
- abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
384
- def insert_periods_for_abbrev(m):
385
- abbr = m.group(0)
386
- if abbr in abbreviations_as_words:
387
- return abbr
388
- return ".".join(list(abbr)) + "."
389
- text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
390
- text = re.sub(r"\.\.", ".", text)
391
- def remove_periods_for_tts(m):
392
- return m.group().replace(".", " ").strip()
393
- text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
394
- text = re.sub(r"-", " ", text)
395
- text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
396
- text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
397
- text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
398
- if speaker != "Jane":
399
- def insert_thinking_pause(m):
400
- word = m.group(1)
401
- if random.random() < 0.3:
402
- filler = random.choice(['hmm,', 'well,', 'let me see,'])
403
- return f"{word}..., {filler}"
404
- else:
405
- return f"{word}...,"
406
- keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
407
- text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
408
- conj_pattern = r"\b(and|but|so|because|however)\b"
409
- text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
410
- text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
411
- def capitalize_match(m):
412
- return m.group().upper()
413
- text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
414
- return text.strip()
415
-
416
- def _spell_digits(d: str) -> str:
417
- digit_map = {
418
- '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
419
- '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
420
- '8': 'eight', '9': 'nine'
421
- }
422
- return " ".join(digit_map[ch] for ch in d if ch in digit_map)
423
-
424
- def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
425
- if custom_music_path:
426
- music_path = custom_music_path
427
- else:
428
- music_path = "bg_music.mp3"
429
-
430
- try:
431
- bg_music = AudioSegment.from_file(music_path, format="mp3")
432
- except Exception as e:
433
- print("[ERROR] Failed to load background music:", e)
434
- return spoken
435
-
436
- bg_music = bg_music - 18.0
437
- total_length_ms = len(spoken) + 2000
438
- looped_music = AudioSegment.empty()
439
- while len(looped_music) < total_length_ms:
440
- looped_music += bg_music
441
- looped_music = looped_music[:total_length_ms]
442
- final_mix = looped_music.overlay(spoken, position=2000)
443
- return final_mix
444
-
445
- def call_groq_api_for_qa(system_prompt: str) -> str:
446
- #Kept for use, Changed model
447
- try:
448
- headers = {
449
- "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
450
- "Content-Type": "application/json",
451
- "Accept": "application/json"
452
- }
453
- data = {
454
- "model": "deepseek-r1-distill-llama-70b", #Using Deepseek
455
- "messages": [{"role": "user", "content": system_prompt}],
456
- "max_tokens": 512,
457
- "temperature": 0.7
458
- }
459
- response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
460
- headers=headers, data=json.dumps(data))
461
- response.raise_for_status()
462
- return response.json()["choices"][0]["message"]["content"].strip()
463
- except Exception as e:
464
- print("[ERROR] Groq API error:", e)
465
- fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
466
- return json.dumps(fallback)
467
-
468
- # --- Agent and Tavily Integration ---
469
- def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
470
- """
471
- Runs the new research agent to generate a research report.
472
- """
473
- print(f"[LOG] Starting research agent for topic: {topic}")
474
- try:
475
 
476
- agent = OpenDeepResearchAgent(query=topic, max_results=max_results, api_key=os.environ.get("TAVILY_API_KEY"))
477
- report_content = agent.run()
478
- print("[LOG] Research agent completed successfully.")
479
- structured_report = generate_report(report_content)
480
- return structured_report
481
 
482
- except Exception as e:
483
- print(f"[ERROR] Error in research agent: {e}")
484
- return f"Sorry, I encountered an error during research: {e}"
 
14
  import numpy as np
15
  import torch
16
  import random
17
+ #New Imports
18
+ from tavily import TavilyClient
19
+ from report_structure import generate_report
20
 
21
 
22
  class DialogueItem(BaseModel):
 
50
  try:
51
  headers = {
52
  "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
53
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
54
+ "Chrome/115.0.0.0 Safari/537.36")
55
  }
56
  response = requests.get(url, headers=headers)
57
  if response.status_code != 200:
 
74
  return shifted_audio.set_frame_rate(audio.frame_rate)
75
 
76
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
77
+ # This function's role is reduced; the agent decides.
78
  word_count = len(text.split())
79
  print(f"[DEBUG] Aggregated word count: {word_count}")
80
  return word_count >= min_word_count
 
83
  # No longer needed
84
  pass
85
  def research_topic(topic: str) -> str:
86
+ # No longer needed
87
+ pass
88
 
89
  def fetch_wikipedia_summary(topic: str) -> str:
90
+ # No longer needed
91
+ pass
92
 
93
  def fetch_rss_feed(feed_url: str) -> list:
94
+ # No longer needed
95
+ pass
96
 
97
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
98
+ # No longer needed
99
+ pass
100
 
101
  def fetch_article_text(link: str) -> str:
102
+ # No longer needed
103
+ pass
104
 
105
  def generate_script(
106
  system_prompt: str,
 
191
  "temperature": 0.7
192
  }
193
  response = requests.post("https://openrouter.ai/api/v1/chat/completions",
194
+ headers=headers, data=json.dumps(data))
195
  response.raise_for_status()
196
  raw_content = response.json()["choices"][0]["message"]["content"].strip()
197
  except Exception as e:
 
235
  print("[ERROR] JSON decoding failed:", e)
236
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
237
 
238
+ def transcribe_youtube_video(video_url: str) -> str:
239
+ print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
240
+ video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
241
+ if not video_id_match:
242
+ raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ video_id = video_id_match.group(1)
245
+ print("[LOG] Extracted video ID:", video_id)
 
246
 
247
+ base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
248
+ params = {"video_id": video_id, "lang": "en"}
249
+ headers = {
250
+ "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
251
+ "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
252
+ }
253
 
254
+ try:
255
+ response = requests.get(base_url, headers=headers, params=params, timeout=30)
256
+ print("[LOG] RapidAPI Response Status Code:", response.status_code)
257
+ print("[LOG] RapidAPI Response Body:", response.text)
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  if response.status_code != 200:
260
+ raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
261
+
262
+ data = response.json()
263
+ if not isinstance(data, list) or not data:
264
+ raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
265
+
266
+ transcript_as_text = data[0].get('transcriptionAsText', '').strip()
267
+ if not transcript_as_text:
268
+ raise ValueError("transcriptionAsText field is missing or empty.")
269
+
270
+ print("[LOG] Transcript retrieval successful.")
271
+ print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
272
+ snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
273
+ print(f"[DEBUG] Transcript Snippet: {snippet}")
274
+
275
+ return transcript_as_text
276
+ except Exception as e:
277
+ print("[ERROR] RapidAPI transcription error:", e)
278
+ raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
279
+
280
+ def generate_audio_mp3(text: str, speaker: str) -> str:
281
+ try:
282
+ import streamlit as st
283
+ print(f"[LOG] Generating audio for speaker: {speaker}")
284
+ language_selection = st.session_state.get("language_selection", "English (American)")
285
+ if language_selection == "English (American)":
286
+ print(f"[LOG] Using Deepgram for English (American)")
287
+ if speaker in ["John", "Jane"]:
288
+ processed_text = text
289
+ else:
290
+ processed_text = _preprocess_text_for_tts(text, speaker)
291
+ deepgram_api_url = "https://api.deepgram.com/v1/speak"
292
+ params = {"model": "aura-asteria-en"}
293
+ if speaker == "John":
294
+ params["model"] = "aura-zeus-en"
295
+ headers = {
296
+ "Accept": "audio/mpeg",
297
+ "Content-Type": "application/json",
298
+ "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
299
+ }
300
+ body = {"text": processed_text}
301
+ response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
302
+ if response.status_code != 200:
303
+ raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
304
+ content_type = response.headers.get('Content-Type', '')
305
+ if 'audio/mpeg' not in content_type:
306
+ raise ValueError("Unexpected Content-Type from Deepgram.")
307
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
308
+ for chunk in response.iter_content(chunk_size=8192):
309
+ if chunk:
310
+ mp3_file.write(chunk)
311
+ mp3_path = mp3_file.name
312
+ audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
313
+ audio_seg = effects.normalize(audio_seg)
314
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
315
+ audio_seg.export(final_mp3_path, format="mp3")
316
+ if os.path.exists(mp3_path):
317
+ os.remove(mp3_path)
318
+ return final_mp3_path
319
+ else:
320
+ print(f"[LOG] Using Murf API for language: {language_selection}")
321
+ if language_selection == "Hinglish":
322
+ from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
323
+ text = transliterate(text, DEVANAGARI, IAST)
324
+ api_key = os.environ.get("MURF_API_KEY")
325
+ headers = {
326
+ "Content-Type": "application/json",
327
+ "Accept": "application/json",
328
+ "api-key": api_key
329
+ }
330
+ multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
331
+ if language_selection == "English (Indian)":
332
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
333
+ elif language_selection == "Hindi":
334
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
335
+ elif language_selection == "Hinglish":
336
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
337
+ else:
338
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
339
+ payload = {
340
+ "audioDuration": 0,
341
+ "channelType": "MONO",
342
+ "encodeAsBase64": False,
343
+ "format": "WAV",
344
+ "modelVersion": "GEN2",
345
+ "multiNativeLocale": multi_native_locale,
346
+ "pitch": 0,
347
+ "pronunciationDictionary": {},
348
+ "rate": 0,
349
+ "sampleRate": 48000,
350
+ "style": "Conversational",
351
+ "text": text,
352
+ "variation": 1,
353
+ "voiceId": voice_id
354
+ }
355
+ response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
356
+ if response.status_code != 200:
357
+ raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
358
+ json_resp = response.json()
359
+ audio_url = json_resp.get("audioFile")
360
+ if not audio_url:
361
+ raise ValueError("No audio file URL returned by Murf API")
362
+ audio_response = requests.get(audio_url)
363
+ if audio_response.status_code != 200:
364
+ raise ValueError(f"Error fetching audio from {audio_url}")
365
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
366
+ wav_file.write(audio_response.content)
367
+ wav_path = wav_file.name
368
+ audio_seg = AudioSegment.from_file(wav_path, format="wav")
369
+ audio_seg = effects.normalize(audio_seg)
370
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
371
+ audio_seg.export(final_mp3_path, format="mp3")
372
+ os.remove(wav_path)
373
+ return final_mp3_path
374
+ except Exception as e:
375
+ print("[ERROR] Error generating audio:", e)
376
+ raise ValueError(f"Error generating audio: {str(e)}")
377
+
378
+ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
379
+ pass
380
+
381
+ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
382
+ text = re.sub(r"\bNo\.\b", "Number", text)
383
+ text = re.sub(r"\b(?i)SaaS\b", "sass", text)
384
+ abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
385
+ def insert_periods_for_abbrev(m):
386
+ abbr = m.group(0)
387
+ if abbr in abbreviations_as_words:
388
+ return abbr
389
+ return ".".join(list(abbr)) + "."
390
+ text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
391
+ text = re.sub(r"\.\.", ".", text)
392
+ def remove_periods_for_tts(m):
393
+ return m.group().replace(".", " ").strip()
394
+ text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
395
+ text = re.sub(r"-", " ", text)
396
+ text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
397
+ text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
398
+ text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
399
+ if speaker != "Jane":
400
+ def insert_thinking_pause(m):
401
+ word = m.group(1)
402
+ if random.random() < 0.3:
403
+ filler = random.choice(['hmm,', 'well,', 'let me see,'])
404
+ return f"{word}..., {filler}"
405
+ else:
406
+ return f"{word}...,"
407
+ keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
408
+ text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
409
+ conj_pattern = r"\b(and|but|so|because|however)\b"
410
+ text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
411
+ text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
412
+ def capitalize_match(m):
413
+ return m.group().upper()
414
+ text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
415
+ return text.strip()
416
+
417
+ def _spell_digits(d: str) -> str:
418
+ digit_map = {
419
+ '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
420
+ '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
421
+ '8': 'eight', '9': 'nine'
422
+ }
423
+ return " ".join(digit_map[ch] for ch in d if ch in digit_map)
424
+
425
+ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
426
+ if custom_music_path:
427
+ music_path = custom_music_path
428
  else:
429
+ music_path = "bg_music.mp3"
430
+
431
+ try:
432
+ bg_music = AudioSegment.from_file(music_path, format="mp3")
433
+ except Exception as e:
434
+ print("[ERROR] Failed to load background music:", e)
435
+ return spoken
436
+
437
+ bg_music = bg_music - 18.0
438
+ total_length_ms = len(spoken) + 2000
439
+ looped_music = AudioSegment.empty()
440
+ while len(looped_music) < total_length_ms:
441
+ looped_music += bg_music
442
+ looped_music = looped_music[:total_length_ms]
443
+ final_mix = looped_music.overlay(spoken, position=2000)
444
+ return final_mix
445
+
446
+ def call_groq_api_for_qa(system_prompt: str) -> str:
447
+ #Kept for use, Changed model
448
+ try:
449
  headers = {
450
+ "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
451
  "Content-Type": "application/json",
452
+ "Accept": "application/json"
 
453
  }
454
+ data = {
455
+ "model": "deepseek-r1-distill-llama-70b", #Using Deepseek
456
+ "messages": [{"role": "user", "content": system_prompt}],
457
+ "max_tokens": 512,
458
+ "temperature": 0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  }
460
+ response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
461
+ headers=headers, data=json.dumps(data))
462
+ response.raise_for_status()
463
+ return response.json()["choices"][0]["message"]["content"].strip()
464
+ except Exception as e:
465
+ print("[ERROR] Groq API error:", e)
466
+ fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
467
+ return json.dumps(fallback)
468
+ # --- Agent and Tavily Integration, Using Firecrawl ---
469
+
470
+ def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 10) -> str:
471
+ print(f"[LOG] Starting research agent for topic: {topic}")
472
+ try:
473
+ tavily_client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
474
+ search_results = tavily_client.search(query=topic, max_results=max_results).results
475
+
476
+ if not search_results:
477
+ return "No relevant search results found."
478
+
479
+ print(f"[DEBUG] Tavily results: {search_results}") # Debug print
480
+
481
+ # Use Firecrawl to scrape the content of each URL
482
+ combined_content = ""
483
+ for result in search_results:
484
+ url = result.get('url')
485
+ print(f"[LOG] Scraping URL with Firecrawl: {url}") # Debug print
486
+ if url:
487
+ headers = {'Authorization': f'Bearer {os.environ.get("FIRECRAWL_API_KEY")}'}
488
+ payload = {"url": url, "formats": ["markdown"], "onlyMainContent": True}
489
+
490
+ try:
491
+ response = requests.post("https://api.firecrawl.dev/v1/scrape", headers=headers, json=payload)
492
+ response.raise_for_status() #Raise error for status code
493
+ data = response.json()
494
+ print(f"[DEBUG] Firecrawl response: {data}")
495
+
496
+ if data.get('success') and 'markdown' in data.get('data',{}):
497
+ combined_content += data['data']['markdown'] + "\n\n" #Add new lines
498
+ else:
499
+ print(f"[WARNING] Firecrawl scrape failed or no markdown content for {url}: {data.get('error')}")
500
+
501
+ except requests.RequestException as e:
502
+ print (f"[ERROR] Error during Firecrawl request for {url}: {e}")
503
+ continue #Continue to the next URL
504
+
505
+ if not combined_content:
506
+ return "Could not retrieve content from any of the search results."
507
+
508
+ # Use Groq LLM to generate the report
509
+ prompt = f"""You are a world-class researcher, and you are tasked to write a research report on the following topic:
510
+ {topic}
511
+ Use the following pieces of information, gathered from various web sources, to construct your report:
512
+
513
+ {combined_content}
514
+
515
+ Compile and synthesize the information to create a well-structured and informative research report.
516
+ Cite sources appropriately in the context itself. Do not produce the report in JSON format, only return standard
517
+ text output
518
+ """
519
+
520
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
521
+ response = groq_client.chat.completions.create(
522
+ messages=[
523
+ {"role": "user", "content": prompt}
524
+ ],
525
+ model="deepseek-r1-distill-llama-70b",
526
+ temperature = 0.2
527
+ )
528
+ report_text = response.choices[0].message.content
529
+ print(f"[DEBUG] Raw report from LLM:\n{report_text}")
530
+
531
+ structured_report = generate_report(report_text)
532
+ return structured_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
 
 
 
 
 
534
 
535
+ except Exception as e:
536
+ print(f"[ERROR] Error in research agent: {e}")
537
+ return f"Sorry, I encountered an error during research: {e}"