siddhartharyaai commited on
Commit
6ff5ac5
·
verified ·
1 Parent(s): b187370

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +45 -76
utils.py CHANGED
@@ -12,6 +12,14 @@ import tiktoken
12
  from groq import Groq
13
  import numpy as np
14
  import torch
 
 
 
 
 
 
 
 
15
 
16
  class DialogueItem(BaseModel):
17
  speaker: Literal["Jane", "John"]
@@ -28,6 +36,7 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
28
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
29
 
30
  try:
 
31
  response = groq_client.chat.completions.create(
32
  messages=[
33
  {"role": "system", "content": prompt},
@@ -39,7 +48,20 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
39
  )
40
 
41
  if not response.choices or not response.choices[0].message.content:
42
- raise ValueError("LLM returned an empty response.")
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  script_content = response.choices[0].message.content.strip()
45
  dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
@@ -50,7 +72,7 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
50
  return Dialogue(dialogue=dialogue_items)
51
 
52
  except Exception as e:
53
- print(f"[ERROR] Failed to generate script: {str(e)}")
54
  return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
55
 
56
  def parse_script_to_dialogue(script, host_name, guest_name):
@@ -80,67 +102,9 @@ def truncate_text(text, max_tokens=2048):
80
 
81
  return text
82
 
83
- def extract_text_from_url(url):
84
- """
85
- Extracts readable text from a given URL.
86
- """
87
- headers = {
88
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
89
- }
90
- response = requests.get(url, headers=headers)
91
- if response.status_code != 200:
92
- return ""
93
-
94
- soup = BeautifulSoup(response.text, 'html.parser')
95
- for script in soup(["script", "style"]):
96
- script.decompose()
97
-
98
- return soup.get_text(separator=' ')
99
-
100
- def transcribe_youtube_video(video_url):
101
- """
102
- Uses yt-dlp to extract audio and transcribe speech.
103
- """
104
- temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
105
- ydl_opts = {
106
- 'format': 'bestaudio/best',
107
- 'postprocessors': [{
108
- 'key': 'FFmpegExtractAudio',
109
- 'preferredcodec': 'mp3',
110
- 'preferredquality': '192',
111
- }],
112
- 'outtmpl': temp_audio_file.name,
113
- }
114
-
115
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
116
- ydl.download([video_url])
117
-
118
- return transcribe_audio(temp_audio_file.name)
119
-
120
- def transcribe_audio(file_path):
121
- """
122
- Uses Deepgram API to transcribe audio.
123
- """
124
- DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
125
- if not DEEPGRAM_API_KEY:
126
- return "Deepgram API key is missing."
127
-
128
- url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
129
- headers = {
130
- "Authorization": f"Token {DEEPGRAM_API_KEY}",
131
- "Content-Type": "audio/mpeg"
132
- }
133
-
134
- with open(file_path, "rb") as f:
135
- response = requests.post(url, headers=headers, data=f)
136
-
137
- response.raise_for_status()
138
- data = response.json()
139
- return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
140
-
141
  def generate_audio_mp3(text, speaker):
142
  """
143
- Uses Groq's LLM to generate realistic text-to-speech (TTS) audio.
144
  """
145
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
146
  response = groq_client.chat.completions.create(
@@ -150,25 +114,28 @@ def generate_audio_mp3(text, speaker):
150
  temperature=0.6
151
  )
152
 
153
- speech_text = response.choices[0].message.content
154
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
155
- temp_audio.write(speech_text.encode('utf-8'))
156
- return temp_audio.name
157
 
158
- def mix_with_bg_music(spoken_audio_file, bg_music_file=None):
159
- """
160
- Mixes speech with background music (if provided).
161
- """
162
- spoken_audio = AudioSegment.from_file(spoken_audio_file, format="mp3")
 
 
 
163
 
164
- if bg_music_file:
165
- bg_music = AudioSegment.from_file(bg_music_file, format="mp3").set_frame_rate(spoken_audio.frame_rate)
166
- bg_music = bg_music - 15 # Lower background music volume
167
- mixed_audio = spoken_audio.overlay(bg_music, loop=True)
168
- else:
169
- mixed_audio = spoken_audio
170
 
171
- return mixed_audio
 
 
 
 
172
 
173
  def research_topic(topic: str) -> str:
174
  """
@@ -177,7 +144,9 @@ def research_topic(topic: str) -> str:
177
  from run_agents import run_deep_research_agent # Ensure Open Deep Researcher is connected.
178
 
179
  try:
 
180
  research_result = run_deep_research_agent(topic)
181
  return research_result if research_result else f"No new information found for '{topic}'."
182
  except Exception as e:
 
183
  return f"Error during research: {str(e)}"
 
12
  from groq import Groq
13
  import numpy as np
14
  import torch
15
+ import logging
16
+
17
+ # Configure Logging
18
+ logging.basicConfig(
19
+ filename="debug.log",
20
+ level=logging.DEBUG,
21
+ format="%(asctime)s - %(levelname)s - %(message)s"
22
+ )
23
 
24
  class DialogueItem(BaseModel):
25
  speaker: Literal["Jane", "John"]
 
36
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
37
 
38
  try:
39
+ logging.info("Calling LLM for script generation.")
40
  response = groq_client.chat.completions.create(
41
  messages=[
42
  {"role": "system", "content": prompt},
 
48
  )
49
 
50
  if not response.choices or not response.choices[0].message.content:
51
+ logging.error("LLM returned an empty response. Retrying with fallback prompt...")
52
+ fallback_prompt = "Summarize the topic in a conversational way between two speakers."
53
+ response = groq_client.chat.completions.create(
54
+ messages=[
55
+ {"role": "system", "content": fallback_prompt},
56
+ {"role": "user", "content": text}
57
+ ],
58
+ model="DeepSeek-R1-Distill-Llama-70B",
59
+ max_tokens=4096,
60
+ temperature=0.6
61
+ )
62
+
63
+ if not response.choices or not response.choices[0].message.content:
64
+ raise ValueError("LLM failed twice. No valid script generated.")
65
 
66
  script_content = response.choices[0].message.content.strip()
67
  dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
 
72
  return Dialogue(dialogue=dialogue_items)
73
 
74
  except Exception as e:
75
+ logging.error(f"Failed to generate script: {str(e)}")
76
  return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
77
 
78
  def parse_script_to_dialogue(script, host_name, guest_name):
 
102
 
103
  return text
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def generate_audio_mp3(text, speaker):
106
  """
107
+ Uses Groq's LLM to generate realistic text-to-speech (TTS) audio and ensures it is valid.
108
  """
109
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
110
  response = groq_client.chat.completions.create(
 
114
  temperature=0.6
115
  )
116
 
117
+ speech_text = response.choices[0].message.content.strip()
118
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
 
 
119
 
120
+ try:
121
+ temp_audio.write(speech_text.encode('utf-8'))
122
+ temp_audio.close()
123
+
124
+ # Verify MP3 File Integrity Before Returning
125
+ audio_test = AudioSegment.from_file(temp_audio.name, format="mp3")
126
+ if len(audio_test) == 0:
127
+ raise ValueError("Generated MP3 file is empty or corrupted.")
128
 
129
+ return temp_audio.name
130
+
131
+ except Exception as e:
132
+ logging.error(f"Failed to generate MP3 file: {str(e)}")
 
 
133
 
134
+ # ✅ Fallback: Generate a silent MP3 file to prevent ffmpeg crashes
135
+ silent_audio = AudioSegment.silent(duration=2000) # 2 seconds of silence
136
+ fallback_mp3 = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
137
+ silent_audio.export(fallback_mp3.name, format="mp3")
138
+ return fallback_mp3.name
139
 
140
  def research_topic(topic: str) -> str:
141
  """
 
144
  from run_agents import run_deep_research_agent # Ensure Open Deep Researcher is connected.
145
 
146
  try:
147
+ logging.info(f"Running Open Deep Researcher for topic: {topic}")
148
  research_result = run_deep_research_agent(topic)
149
  return research_result if research_result else f"No new information found for '{topic}'."
150
  except Exception as e:
151
+ logging.error(f"Error during research: {str(e)}")
152
  return f"Error during research: {str(e)}"