Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -14,8 +14,9 @@ from groq import Groq # Retained for LLM interaction
|
|
14 |
import numpy as np
|
15 |
import torch
|
16 |
import random
|
17 |
-
|
18 |
-
from
|
|
|
19 |
|
20 |
|
21 |
class DialogueItem(BaseModel):
|
@@ -49,8 +50,8 @@ def extract_text_from_url(url):
|
|
49 |
try:
|
50 |
headers = {
|
51 |
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
52 |
-
|
53 |
-
|
54 |
}
|
55 |
response = requests.get(url, headers=headers)
|
56 |
if response.status_code != 200:
|
@@ -73,7 +74,7 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
|
|
73 |
return shifted_audio.set_frame_rate(audio.frame_rate)
|
74 |
|
75 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
76 |
-
#This function
|
77 |
word_count = len(text.split())
|
78 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
79 |
return word_count >= min_word_count
|
@@ -82,24 +83,24 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
82 |
# No longer needed
|
83 |
pass
|
84 |
def research_topic(topic: str) -> str:
|
85 |
-
|
86 |
-
|
87 |
|
88 |
def fetch_wikipedia_summary(topic: str) -> str:
|
89 |
-
|
90 |
-
|
91 |
|
92 |
def fetch_rss_feed(feed_url: str) -> list:
|
93 |
-
|
94 |
-
|
95 |
|
96 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
97 |
-
|
98 |
-
|
99 |
|
100 |
def fetch_article_text(link: str) -> str:
|
101 |
-
|
102 |
-
|
103 |
|
104 |
def generate_script(
|
105 |
system_prompt: str,
|
@@ -190,7 +191,7 @@ def generate_script(
|
|
190 |
"temperature": 0.7
|
191 |
}
|
192 |
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
193 |
-
|
194 |
response.raise_for_status()
|
195 |
raw_content = response.json()["choices"][0]["message"]["content"].strip()
|
196 |
except Exception as e:
|
@@ -234,251 +235,303 @@ def generate_script(
|
|
234 |
print("[ERROR] JSON decoding failed:", e)
|
235 |
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
236 |
|
237 |
-
def transcribe_youtube_video(video_url: str) -> str:
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
video_id = video_id_match.group(1)
|
244 |
-
print("[LOG] Extracted video ID:", video_id)
|
245 |
-
|
246 |
-
base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
247 |
-
params = {"video_id": video_id, "lang": "en"}
|
248 |
-
headers = {
|
249 |
-
"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
|
250 |
-
"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
|
251 |
-
}
|
252 |
-
|
253 |
-
try:
|
254 |
-
response = requests.get(base_url, headers=headers, params=params, timeout=30)
|
255 |
-
print("[LOG] RapidAPI Response Status Code:", response.status_code)
|
256 |
-
print("[LOG] RapidAPI Response Body:", response.text)
|
257 |
-
|
258 |
-
if response.status_code != 200:
|
259 |
-
raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
|
260 |
-
|
261 |
-
data = response.json()
|
262 |
-
if not isinstance(data, list) or not data:
|
263 |
-
raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
|
264 |
|
265 |
-
|
266 |
-
|
267 |
-
raise ValueError("transcriptionAsText field is missing or empty.")
|
268 |
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
|
279 |
-
def generate_audio_mp3(text: str, speaker: str) -> str:
|
280 |
-
try:
|
281 |
-
import streamlit as st
|
282 |
-
print(f"[LOG] Generating audio for speaker: {speaker}")
|
283 |
-
language_selection = st.session_state.get("language_selection", "English (American)")
|
284 |
-
if language_selection == "English (American)":
|
285 |
-
print(f"[LOG] Using Deepgram for English (American)")
|
286 |
-
if speaker in ["John", "Jane"]:
|
287 |
-
processed_text = text
|
288 |
-
else:
|
289 |
-
processed_text = _preprocess_text_for_tts(text, speaker)
|
290 |
-
deepgram_api_url = "https://api.deepgram.com/v1/speak"
|
291 |
-
params = {"model": "aura-asteria-en"}
|
292 |
-
if speaker == "John":
|
293 |
-
params["model"] = "aura-zeus-en"
|
294 |
-
headers = {
|
295 |
-
"Accept": "audio/mpeg",
|
296 |
-
"Content-Type": "application/json",
|
297 |
-
"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
|
298 |
-
}
|
299 |
-
body = {"text": processed_text}
|
300 |
-
response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
|
301 |
if response.status_code != 200:
|
302 |
-
raise ValueError(f"
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
else:
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
headers = {
|
|
|
325 |
"Content-Type": "application/json",
|
326 |
-
"Accept": "application/json"
|
327 |
-
"api-key": api_key
|
328 |
}
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
elif language_selection == "Hinglish":
|
335 |
-
voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
|
336 |
-
else:
|
337 |
-
voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
|
338 |
-
payload = {
|
339 |
-
"audioDuration": 0,
|
340 |
-
"channelType": "MONO",
|
341 |
-
"encodeAsBase64": False,
|
342 |
-
"format": "WAV",
|
343 |
-
"modelVersion": "GEN2",
|
344 |
-
"multiNativeLocale": multi_native_locale,
|
345 |
-
"pitch": 0,
|
346 |
-
"pronunciationDictionary": {},
|
347 |
-
"rate": 0,
|
348 |
-
"sampleRate": 48000,
|
349 |
-
"style": "Conversational",
|
350 |
-
"text": text,
|
351 |
-
"variation": 1,
|
352 |
-
"voiceId": voice_id
|
353 |
}
|
354 |
-
response = requests.post("https://api.
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
else:
|
428 |
-
music_path = "bg_music.mp3"
|
429 |
-
|
430 |
-
try:
|
431 |
-
bg_music = AudioSegment.from_file(music_path, format="mp3")
|
432 |
-
except Exception as e:
|
433 |
-
print("[ERROR] Failed to load background music:", e)
|
434 |
-
return spoken
|
435 |
-
|
436 |
-
bg_music = bg_music - 18.0
|
437 |
-
total_length_ms = len(spoken) + 2000
|
438 |
-
looped_music = AudioSegment.empty()
|
439 |
-
while len(looped_music) < total_length_ms:
|
440 |
-
looped_music += bg_music
|
441 |
-
looped_music = looped_music[:total_length_ms]
|
442 |
-
final_mix = looped_music.overlay(spoken, position=2000)
|
443 |
-
return final_mix
|
444 |
-
|
445 |
-
def call_groq_api_for_qa(system_prompt: str) -> str:
|
446 |
-
#Kept for use, Changed model
|
447 |
-
try:
|
448 |
-
headers = {
|
449 |
-
"Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
|
450 |
-
"Content-Type": "application/json",
|
451 |
-
"Accept": "application/json"
|
452 |
-
}
|
453 |
-
data = {
|
454 |
-
"model": "deepseek-r1-distill-llama-70b", #Using Deepseek
|
455 |
-
"messages": [{"role": "user", "content": system_prompt}],
|
456 |
-
"max_tokens": 512,
|
457 |
-
"temperature": 0.7
|
458 |
-
}
|
459 |
-
response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
|
460 |
-
headers=headers, data=json.dumps(data))
|
461 |
-
response.raise_for_status()
|
462 |
-
return response.json()["choices"][0]["message"]["content"].strip()
|
463 |
-
except Exception as e:
|
464 |
-
print("[ERROR] Groq API error:", e)
|
465 |
-
fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
|
466 |
-
return json.dumps(fallback)
|
467 |
-
|
468 |
-
# --- Agent and Tavily Integration ---
|
469 |
-
def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
|
470 |
-
"""
|
471 |
-
Runs the new research agent to generate a research report.
|
472 |
-
"""
|
473 |
-
print(f"[LOG] Starting research agent for topic: {topic}")
|
474 |
-
try:
|
475 |
|
476 |
-
agent = OpenDeepResearchAgent(query=topic, max_results=max_results, api_key=os.environ.get("TAVILY_API_KEY"))
|
477 |
-
report_content = agent.run()
|
478 |
-
print("[LOG] Research agent completed successfully.")
|
479 |
-
structured_report = generate_report(report_content)
|
480 |
-
return structured_report
|
481 |
|
482 |
-
|
483 |
-
|
484 |
-
|
|
|
14 |
import numpy as np
|
15 |
import torch
|
16 |
import random
|
17 |
+
#New Imports
|
18 |
+
from tavily import TavilyClient
|
19 |
+
from report_structure import generate_report
|
20 |
|
21 |
|
22 |
class DialogueItem(BaseModel):
|
|
|
50 |
try:
|
51 |
headers = {
|
52 |
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
53 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
54 |
+
"Chrome/115.0.0.0 Safari/537.36")
|
55 |
}
|
56 |
response = requests.get(url, headers=headers)
|
57 |
if response.status_code != 200:
|
|
|
74 |
return shifted_audio.set_frame_rate(audio.frame_rate)
|
75 |
|
76 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
77 |
+
# This function's role is reduced; the agent decides.
|
78 |
word_count = len(text.split())
|
79 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
80 |
return word_count >= min_word_count
|
|
|
83 |
# No longer needed
|
84 |
pass
|
85 |
def research_topic(topic: str) -> str:
|
86 |
+
# No longer needed
|
87 |
+
pass
|
88 |
|
89 |
def fetch_wikipedia_summary(topic: str) -> str:
|
90 |
+
# No longer needed
|
91 |
+
pass
|
92 |
|
93 |
def fetch_rss_feed(feed_url: str) -> list:
|
94 |
+
# No longer needed
|
95 |
+
pass
|
96 |
|
97 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
98 |
+
# No longer needed
|
99 |
+
pass
|
100 |
|
101 |
def fetch_article_text(link: str) -> str:
|
102 |
+
# No longer needed
|
103 |
+
pass
|
104 |
|
105 |
def generate_script(
|
106 |
system_prompt: str,
|
|
|
191 |
"temperature": 0.7
|
192 |
}
|
193 |
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
194 |
+
headers=headers, data=json.dumps(data))
|
195 |
response.raise_for_status()
|
196 |
raw_content = response.json()["choices"][0]["message"]["content"].strip()
|
197 |
except Exception as e:
|
|
|
235 |
print("[ERROR] JSON decoding failed:", e)
|
236 |
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
237 |
|
238 |
+
def transcribe_youtube_video(video_url: str) -> str:
|
239 |
+
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
240 |
+
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
241 |
+
if not video_id_match:
|
242 |
+
raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
+
video_id = video_id_match.group(1)
|
245 |
+
print("[LOG] Extracted video ID:", video_id)
|
|
|
246 |
|
247 |
+
base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
248 |
+
params = {"video_id": video_id, "lang": "en"}
|
249 |
+
headers = {
|
250 |
+
"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
|
251 |
+
"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
|
252 |
+
}
|
253 |
|
254 |
+
try:
|
255 |
+
response = requests.get(base_url, headers=headers, params=params, timeout=30)
|
256 |
+
print("[LOG] RapidAPI Response Status Code:", response.status_code)
|
257 |
+
print("[LOG] RapidAPI Response Body:", response.text)
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
if response.status_code != 200:
|
260 |
+
raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
|
261 |
+
|
262 |
+
data = response.json()
|
263 |
+
if not isinstance(data, list) or not data:
|
264 |
+
raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
|
265 |
+
|
266 |
+
transcript_as_text = data[0].get('transcriptionAsText', '').strip()
|
267 |
+
if not transcript_as_text:
|
268 |
+
raise ValueError("transcriptionAsText field is missing or empty.")
|
269 |
+
|
270 |
+
print("[LOG] Transcript retrieval successful.")
|
271 |
+
print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
|
272 |
+
snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
|
273 |
+
print(f"[DEBUG] Transcript Snippet: {snippet}")
|
274 |
+
|
275 |
+
return transcript_as_text
|
276 |
+
except Exception as e:
|
277 |
+
print("[ERROR] RapidAPI transcription error:", e)
|
278 |
+
raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
|
279 |
+
|
280 |
+
def generate_audio_mp3(text: str, speaker: str) -> str:
|
281 |
+
try:
|
282 |
+
import streamlit as st
|
283 |
+
print(f"[LOG] Generating audio for speaker: {speaker}")
|
284 |
+
language_selection = st.session_state.get("language_selection", "English (American)")
|
285 |
+
if language_selection == "English (American)":
|
286 |
+
print(f"[LOG] Using Deepgram for English (American)")
|
287 |
+
if speaker in ["John", "Jane"]:
|
288 |
+
processed_text = text
|
289 |
+
else:
|
290 |
+
processed_text = _preprocess_text_for_tts(text, speaker)
|
291 |
+
deepgram_api_url = "https://api.deepgram.com/v1/speak"
|
292 |
+
params = {"model": "aura-asteria-en"}
|
293 |
+
if speaker == "John":
|
294 |
+
params["model"] = "aura-zeus-en"
|
295 |
+
headers = {
|
296 |
+
"Accept": "audio/mpeg",
|
297 |
+
"Content-Type": "application/json",
|
298 |
+
"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
|
299 |
+
}
|
300 |
+
body = {"text": processed_text}
|
301 |
+
response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
|
302 |
+
if response.status_code != 200:
|
303 |
+
raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
|
304 |
+
content_type = response.headers.get('Content-Type', '')
|
305 |
+
if 'audio/mpeg' not in content_type:
|
306 |
+
raise ValueError("Unexpected Content-Type from Deepgram.")
|
307 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
|
308 |
+
for chunk in response.iter_content(chunk_size=8192):
|
309 |
+
if chunk:
|
310 |
+
mp3_file.write(chunk)
|
311 |
+
mp3_path = mp3_file.name
|
312 |
+
audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
|
313 |
+
audio_seg = effects.normalize(audio_seg)
|
314 |
+
final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
315 |
+
audio_seg.export(final_mp3_path, format="mp3")
|
316 |
+
if os.path.exists(mp3_path):
|
317 |
+
os.remove(mp3_path)
|
318 |
+
return final_mp3_path
|
319 |
+
else:
|
320 |
+
print(f"[LOG] Using Murf API for language: {language_selection}")
|
321 |
+
if language_selection == "Hinglish":
|
322 |
+
from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
|
323 |
+
text = transliterate(text, DEVANAGARI, IAST)
|
324 |
+
api_key = os.environ.get("MURF_API_KEY")
|
325 |
+
headers = {
|
326 |
+
"Content-Type": "application/json",
|
327 |
+
"Accept": "application/json",
|
328 |
+
"api-key": api_key
|
329 |
+
}
|
330 |
+
multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
|
331 |
+
if language_selection == "English (Indian)":
|
332 |
+
voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
|
333 |
+
elif language_selection == "Hindi":
|
334 |
+
voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
|
335 |
+
elif language_selection == "Hinglish":
|
336 |
+
voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
|
337 |
+
else:
|
338 |
+
voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
|
339 |
+
payload = {
|
340 |
+
"audioDuration": 0,
|
341 |
+
"channelType": "MONO",
|
342 |
+
"encodeAsBase64": False,
|
343 |
+
"format": "WAV",
|
344 |
+
"modelVersion": "GEN2",
|
345 |
+
"multiNativeLocale": multi_native_locale,
|
346 |
+
"pitch": 0,
|
347 |
+
"pronunciationDictionary": {},
|
348 |
+
"rate": 0,
|
349 |
+
"sampleRate": 48000,
|
350 |
+
"style": "Conversational",
|
351 |
+
"text": text,
|
352 |
+
"variation": 1,
|
353 |
+
"voiceId": voice_id
|
354 |
+
}
|
355 |
+
response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
|
356 |
+
if response.status_code != 200:
|
357 |
+
raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
|
358 |
+
json_resp = response.json()
|
359 |
+
audio_url = json_resp.get("audioFile")
|
360 |
+
if not audio_url:
|
361 |
+
raise ValueError("No audio file URL returned by Murf API")
|
362 |
+
audio_response = requests.get(audio_url)
|
363 |
+
if audio_response.status_code != 200:
|
364 |
+
raise ValueError(f"Error fetching audio from {audio_url}")
|
365 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
|
366 |
+
wav_file.write(audio_response.content)
|
367 |
+
wav_path = wav_file.name
|
368 |
+
audio_seg = AudioSegment.from_file(wav_path, format="wav")
|
369 |
+
audio_seg = effects.normalize(audio_seg)
|
370 |
+
final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
371 |
+
audio_seg.export(final_mp3_path, format="mp3")
|
372 |
+
os.remove(wav_path)
|
373 |
+
return final_mp3_path
|
374 |
+
except Exception as e:
|
375 |
+
print("[ERROR] Error generating audio:", e)
|
376 |
+
raise ValueError(f"Error generating audio: {str(e)}")
|
377 |
+
|
378 |
+
def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
|
379 |
+
pass
|
380 |
+
|
381 |
+
def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
382 |
+
text = re.sub(r"\bNo\.\b", "Number", text)
|
383 |
+
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
384 |
+
abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
|
385 |
+
def insert_periods_for_abbrev(m):
|
386 |
+
abbr = m.group(0)
|
387 |
+
if abbr in abbreviations_as_words:
|
388 |
+
return abbr
|
389 |
+
return ".".join(list(abbr)) + "."
|
390 |
+
text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
|
391 |
+
text = re.sub(r"\.\.", ".", text)
|
392 |
+
def remove_periods_for_tts(m):
|
393 |
+
return m.group().replace(".", " ").strip()
|
394 |
+
text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
|
395 |
+
text = re.sub(r"-", " ", text)
|
396 |
+
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
397 |
+
text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
|
398 |
+
text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
|
399 |
+
if speaker != "Jane":
|
400 |
+
def insert_thinking_pause(m):
|
401 |
+
word = m.group(1)
|
402 |
+
if random.random() < 0.3:
|
403 |
+
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
404 |
+
return f"{word}..., {filler}"
|
405 |
+
else:
|
406 |
+
return f"{word}...,"
|
407 |
+
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
408 |
+
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|
409 |
+
conj_pattern = r"\b(and|but|so|because|however)\b"
|
410 |
+
text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
|
411 |
+
text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
|
412 |
+
def capitalize_match(m):
|
413 |
+
return m.group().upper()
|
414 |
+
text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
|
415 |
+
return text.strip()
|
416 |
+
|
417 |
+
def _spell_digits(d: str) -> str:
|
418 |
+
digit_map = {
|
419 |
+
'0': 'zero', '1': 'one', '2': 'two', '3': 'three',
|
420 |
+
'4': 'four', '5': 'five', '6': 'six', '7': 'seven',
|
421 |
+
'8': 'eight', '9': 'nine'
|
422 |
+
}
|
423 |
+
return " ".join(digit_map[ch] for ch in d if ch in digit_map)
|
424 |
+
|
425 |
+
def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
|
426 |
+
if custom_music_path:
|
427 |
+
music_path = custom_music_path
|
428 |
else:
|
429 |
+
music_path = "bg_music.mp3"
|
430 |
+
|
431 |
+
try:
|
432 |
+
bg_music = AudioSegment.from_file(music_path, format="mp3")
|
433 |
+
except Exception as e:
|
434 |
+
print("[ERROR] Failed to load background music:", e)
|
435 |
+
return spoken
|
436 |
+
|
437 |
+
bg_music = bg_music - 18.0
|
438 |
+
total_length_ms = len(spoken) + 2000
|
439 |
+
looped_music = AudioSegment.empty()
|
440 |
+
while len(looped_music) < total_length_ms:
|
441 |
+
looped_music += bg_music
|
442 |
+
looped_music = looped_music[:total_length_ms]
|
443 |
+
final_mix = looped_music.overlay(spoken, position=2000)
|
444 |
+
return final_mix
|
445 |
+
|
446 |
+
def call_groq_api_for_qa(system_prompt: str) -> str:
|
447 |
+
#Kept for use, Changed model
|
448 |
+
try:
|
449 |
headers = {
|
450 |
+
"Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
|
451 |
"Content-Type": "application/json",
|
452 |
+
"Accept": "application/json"
|
|
|
453 |
}
|
454 |
+
data = {
|
455 |
+
"model": "deepseek-r1-distill-llama-70b", #Using Deepseek
|
456 |
+
"messages": [{"role": "user", "content": system_prompt}],
|
457 |
+
"max_tokens": 512,
|
458 |
+
"temperature": 0.7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
}
|
460 |
+
response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
|
461 |
+
headers=headers, data=json.dumps(data))
|
462 |
+
response.raise_for_status()
|
463 |
+
return response.json()["choices"][0]["message"]["content"].strip()
|
464 |
+
except Exception as e:
|
465 |
+
print("[ERROR] Groq API error:", e)
|
466 |
+
fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
|
467 |
+
return json.dumps(fallback)
|
468 |
+
# --- Agent and Tavily Integration, Using Firecrawl ---
|
469 |
+
|
470 |
+
def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 10) -> str:
|
471 |
+
print(f"[LOG] Starting research agent for topic: {topic}")
|
472 |
+
try:
|
473 |
+
tavily_client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
|
474 |
+
search_results = tavily_client.search(query=topic, max_results=max_results).results
|
475 |
+
|
476 |
+
if not search_results:
|
477 |
+
return "No relevant search results found."
|
478 |
+
|
479 |
+
print(f"[DEBUG] Tavily results: {search_results}") # Debug print
|
480 |
+
|
481 |
+
# Use Firecrawl to scrape the content of each URL
|
482 |
+
combined_content = ""
|
483 |
+
for result in search_results:
|
484 |
+
url = result.get('url')
|
485 |
+
print(f"[LOG] Scraping URL with Firecrawl: {url}") # Debug print
|
486 |
+
if url:
|
487 |
+
headers = {'Authorization': f'Bearer {os.environ.get("FIRECRAWL_API_KEY")}'}
|
488 |
+
payload = {"url": url, "formats": ["markdown"], "onlyMainContent": True}
|
489 |
+
|
490 |
+
try:
|
491 |
+
response = requests.post("https://api.firecrawl.dev/v1/scrape", headers=headers, json=payload)
|
492 |
+
response.raise_for_status() #Raise error for status code
|
493 |
+
data = response.json()
|
494 |
+
print(f"[DEBUG] Firecrawl response: {data}")
|
495 |
+
|
496 |
+
if data.get('success') and 'markdown' in data.get('data',{}):
|
497 |
+
combined_content += data['data']['markdown'] + "\n\n" #Add new lines
|
498 |
+
else:
|
499 |
+
print(f"[WARNING] Firecrawl scrape failed or no markdown content for {url}: {data.get('error')}")
|
500 |
+
|
501 |
+
except requests.RequestException as e:
|
502 |
+
print (f"[ERROR] Error during Firecrawl request for {url}: {e}")
|
503 |
+
continue #Continue to the next URL
|
504 |
+
|
505 |
+
if not combined_content:
|
506 |
+
return "Could not retrieve content from any of the search results."
|
507 |
+
|
508 |
+
# Use Groq LLM to generate the report
|
509 |
+
prompt = f"""You are a world-class researcher, and you are tasked to write a research report on the following topic:
|
510 |
+
{topic}
|
511 |
+
Use the following pieces of information, gathered from various web sources, to construct your report:
|
512 |
+
|
513 |
+
{combined_content}
|
514 |
+
|
515 |
+
Compile and synthesize the information to create a well-structured and informative research report.
|
516 |
+
Cite sources appropriately in the context itself. Do not produce the report in JSON format, only return standard
|
517 |
+
text output
|
518 |
+
"""
|
519 |
+
|
520 |
+
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
521 |
+
response = groq_client.chat.completions.create(
|
522 |
+
messages=[
|
523 |
+
{"role": "user", "content": prompt}
|
524 |
+
],
|
525 |
+
model="deepseek-r1-distill-llama-70b",
|
526 |
+
temperature = 0.2
|
527 |
+
)
|
528 |
+
report_text = response.choices[0].message.content
|
529 |
+
print(f"[DEBUG] Raw report from LLM:\n{report_text}")
|
530 |
+
|
531 |
+
structured_report = generate_report(report_text)
|
532 |
+
return structured_report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
|
|
|
|
|
|
|
|
|
|
|
534 |
|
535 |
+
except Exception as e:
|
536 |
+
print(f"[ERROR] Error in research agent: {e}")
|
537 |
+
return f"Sorry, I encountered an error during research: {e}"
|