siddhartharyaai commited on
Commit
424917e
·
verified ·
1 Parent(s): 89feeb0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +138 -184
utils.py CHANGED
@@ -66,10 +66,6 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
66
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
67
  """
68
  Determines if the fetched information meets the sufficiency criteria.
69
-
70
- :param text: Aggregated text from primary sources.
71
- :param min_word_count: Minimum number of words required.
72
- :return: True if sufficient, False otherwise.
73
  """
74
  word_count = len(text.split())
75
  print(f"[DEBUG] Aggregated word count: {word_count}")
@@ -78,13 +74,8 @@ def is_sufficient(text: str, min_word_count: int = 500) -> bool:
78
  def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
79
  """
80
  Queries the Groq API to retrieve additional relevant information from the LLM's knowledge base.
81
-
82
- :param topic: The research topic.
83
- :param existing_text: The text already gathered from primary sources.
84
- :return: Additional relevant information as a string.
85
  """
86
  print("[LOG] Querying LLM for additional information.")
87
- # Define the system prompt for the LLM
88
  system_prompt = (
89
  "You are an AI assistant with extensive knowledge up to 2023-10. "
90
  "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
@@ -94,7 +85,6 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
94
  )
95
 
96
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
97
-
98
  try:
99
  response = groq_client.chat.completions.create(
100
  messages=[{"role": "system", "content": system_prompt}],
@@ -126,18 +116,15 @@ def research_topic(topic: str) -> str:
126
 
127
  summary_parts = []
128
 
129
- # Wikipedia summary
130
  wiki_summary = fetch_wikipedia_summary(topic)
131
  if wiki_summary:
132
  summary_parts.append(f"From Wikipedia: {wiki_summary}")
133
 
134
- # For each news RSS
135
  for name, url in sources.items():
136
  try:
137
  items = fetch_rss_feed(url)
138
  if not items:
139
  continue
140
- # Use simple keyword matching
141
  title, desc, link = find_relevant_article(items, topic, min_match=2)
142
  if link:
143
  article_text = fetch_article_text(link)
@@ -154,15 +141,14 @@ def research_topic(topic: str) -> str:
154
  print(aggregated_info)
155
 
156
  if not is_sufficient(aggregated_info):
157
- print("[LOG] Insufficient information from primary sources. Initiating fallback to LLM.")
158
  additional_info = query_llm_for_additional_info(topic, aggregated_info)
159
  if additional_info:
160
  aggregated_info += " " + additional_info
161
  else:
162
- print("[ERROR] Failed to retrieve additional information from LLM.")
163
 
164
  if not aggregated_info:
165
- print("[LOG] No information found for the topic.")
166
  return f"Sorry, I couldn't find recent information on '{topic}'."
167
 
168
  return aggregated_info
@@ -170,24 +156,21 @@ def research_topic(topic: str) -> str:
170
  def fetch_wikipedia_summary(topic: str) -> str:
171
  print("[LOG] Fetching Wikipedia summary for:", topic)
172
  try:
173
- # 1. Search for the topic
174
  search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
175
  resp = requests.get(search_url)
176
  if resp.status_code != 200:
177
- print(f"[ERROR] Failed to fetch Wikipedia search results for topic: {topic}")
178
  return ""
179
  data = resp.json()
180
  if len(data) > 1 and data[1]:
181
  title = data[1][0]
182
- # 2. Fetch summary
183
  summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
184
  s_resp = requests.get(summary_url)
185
  if s_resp.status_code == 200:
186
  s_data = s_resp.json()
187
  if "extract" in s_data:
188
- print("[LOG] Wikipedia summary fetched successfully.")
189
  return s_data["extract"]
190
- print("[LOG] No Wikipedia summary found for topic:", topic)
191
  return ""
192
  except Exception as e:
193
  print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
@@ -198,55 +181,42 @@ def fetch_rss_feed(feed_url: str) -> list:
198
  try:
199
  resp = requests.get(feed_url)
200
  if resp.status_code != 200:
201
- print(f"[ERROR] Failed to fetch RSS feed: {feed_url} with status code {resp.status_code}")
202
  return []
203
  soup = BeautifulSoup(resp.content, "html.parser")
204
  items = soup.find_all("item")
205
- print(f"[LOG] Number of items fetched from {feed_url}: {len(items)}")
206
  return items
207
  except Exception as e:
208
- print(f"[ERROR] Exception occurred while fetching RSS feed {feed_url}: {e}")
209
  return []
210
 
211
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
212
- """
213
- Searches for relevant articles based on topic keywords.
214
- :param items: List of RSS feed items
215
- :param topic: Topic string
216
- :param min_match: Minimum number of keyword matches required
217
- :return: (title, description, link) or (None, None, None)
218
- """
219
  print("[LOG] Finding relevant articles...")
220
  keywords = re.findall(r'\w+', topic.lower())
221
- print(f"[LOG] Topic keywords: {keywords}")
222
-
223
  for item in items:
224
  title = item.find("title").get_text().strip() if item.find("title") else ""
225
  description = item.find("description").get_text().strip() if item.find("description") else ""
226
  text = f"{title.lower()} {description.lower()}"
227
  matches = sum(1 for kw in keywords if kw in text)
228
- print(f"[DEBUG] Checking article: '{title}' | Matches: {matches}/{len(keywords)}")
229
  if matches >= min_match:
230
  link = item.find("link").get_text().strip() if item.find("link") else ""
231
- print(f"[LOG] Relevant article found: {title}")
232
  return title, description, link
233
- print("[LOG] No relevant articles found based on the current matching criteria.")
234
  return None, None, None
235
 
236
  def fetch_article_text(link: str) -> str:
237
- print("[LOG] Fetching article text from:", link)
238
  if not link:
239
- print("[LOG] No link provided for fetching article text.")
240
  return ""
241
  try:
242
  resp = requests.get(link)
243
  if resp.status_code != 200:
244
- print(f"[ERROR] Failed to fetch article from link: {link} with status code {resp.status_code}")
245
  return ""
246
  soup = BeautifulSoup(resp.text, 'html.parser')
247
  paragraphs = soup.find_all("p")
248
  text = " ".join(p.get_text() for p in paragraphs[:5])
249
- print("[LOG] Article text fetched successfully.")
250
  return text.strip()
251
  except Exception as e:
252
  print(f"[ERROR] Error fetching article text: {e}")
@@ -270,7 +240,6 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
270
  "Casual": "like a conversation between close friends, relaxed and informal",
271
  "Youthful": "like how teenagers might chat, energetic and lively"
272
  }
273
-
274
  chosen_tone = tone_description.get(tone, "casual")
275
 
276
  prompt = (
@@ -292,6 +261,7 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
292
  " ]\n"
293
  "}"
294
  )
 
295
  print("[LOG] Sending prompt to Groq:")
296
  print(prompt)
297
 
@@ -303,176 +273,163 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
303
  temperature=0.7
304
  )
305
  except Exception as e:
306
- print("[ERROR] Groq API error:", e)
307
  raise ValueError(f"Error communicating with Groq API: {str(e)}")
308
 
309
  raw_content = response.choices[0].message.content.strip()
310
- print("[DEBUG] Raw API response content:")
311
- print(raw_content)
312
-
313
- content = raw_content.replace('```json', '').replace('```', '').strip()
314
- start_index = content.find('{')
315
- end_index = content.rfind('}')
316
-
317
  if start_index == -1 or end_index == -1:
318
- print("[ERROR] Failed to parse dialogue. No JSON found.")
319
- print("[ERROR] Entire response content:")
320
- print(content)
321
- raise ValueError("Failed to parse dialogue: Could not find JSON object in response.")
322
 
323
- json_str = content[start_index:end_index+1].strip()
324
-
325
- print("[DEBUG] Extracted JSON string:")
326
- print(json_str)
327
-
328
- try:
329
- data = json.loads(json_str)
330
- print("[LOG] Script generated successfully.")
331
- return Dialogue(**data)
332
- except json.JSONDecodeError as e:
333
- print("[ERROR] JSON decoding failed:", e)
334
- print("[ERROR] Response content causing failure:")
335
- print(content)
336
- raise ValueError(f"Failed to parse dialogue: {str(e)}")
337
-
338
- # ----------------------------------------------------------------------
339
- # We ONLY modify the generate_audio_mp3 flow below to insert random filler words
340
- # and modify punctuation (.,!?) for more natural TTS pauses and intonation.
341
- # ----------------------------------------------------------------------
342
 
 
 
 
 
343
  def _make_text_sound_more_human(text: str) -> str:
344
  """
345
- Inserts small filler words and adds extra punctuation to encourage
346
- natural-sounding pauses at commas, periods, exclamations, and question marks.
347
  """
348
-
349
- # Filler words or short phrases
350
  fillers = ["uh", "um", "ah", "hmm", "you know", "well", "I mean", "like"]
 
 
 
 
 
 
 
 
 
351
 
352
- # 1) Split text by punctuation but keep the punctuation in the result
353
- # We'll handle ".", "?", "!", and commas:
354
- pattern = r'([.,?!])'
355
- parts = re.split(pattern, text)
356
-
357
- # 2) Process each chunk, occasionally inserting filler words or extra punctuation
358
- processed_chunks = []
359
- for i in range(len(parts)):
360
- chunk = parts[i].strip()
361
-
362
- # If the chunk is punctuation, keep it
363
- if chunk in [".", ",", "?", "!"]:
364
- # Possibly turn "." into "..." or add "..." after "?"
365
- if chunk == "." and random.random() < 0.5:
366
- chunk = "..."
367
- elif chunk == "?" and random.random() < 0.3:
368
- # Sometimes add "?!"
369
- chunk = "?!"
370
- elif chunk == "!" and random.random() < 0.3:
371
- # Sometimes add "!!" for more emphasis
372
- chunk = "!!"
373
- processed_chunks.append(chunk)
374
- continue
375
 
376
- # Sometimes insert a filler at the start or mid-chunk
377
- if chunk and random.random() < 0.3:
378
- filler = random.choice(fillers)
379
- # Insert at the beginning or in the middle
380
- if random.random() < 0.5:
381
- chunk = f"{filler}, {chunk}"
382
- else:
383
- # Insert near the middle
384
- words = chunk.split()
385
- mid = len(words) // 2
386
- chunk = " ".join(words[:mid] + [f"{filler},"] + words[mid:])
387
-
388
- processed_chunks.append(chunk)
389
-
390
- # 3) Rejoin them carefully with a space or nothing
391
- # We'll add a small space after punctuation, so TTS sees them as separate tokens
392
- out_text = []
393
- for i in range(len(processed_chunks)):
394
- if i == 0:
395
- out_text.append(processed_chunks[i])
396
- else:
397
- # If the previous chunk was punctuation or the current chunk is punctuation
398
- if processed_chunks[i] in [".", "...", "?", "?!", "!", "!!", ","]:
399
- out_text.append(processed_chunks[i])
400
- else:
401
- out_text.append(" " + processed_chunks[i])
402
 
403
- final_text = "".join(out_text)
404
- return final_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
 
406
  def generate_audio_mp3(text: str, speaker: str) -> str:
407
  try:
408
  print(f"[LOG] Generating audio for speaker: {speaker}")
409
 
410
- # Make text more "human-like"
411
- text = _make_text_sound_more_human(text)
412
-
413
- # Define Deepgram API endpoint
414
- deepgram_api_url = "https://api.deepgram.com/v1/speak"
415
 
416
- # Prepare query parameters
417
- params = {
418
- "model": "aura-asteria-en", # Default model; adjust if needed
419
- }
 
420
 
421
- # Override model if needed based on speaker
422
- if speaker == "Jane":
423
- params["model"] = "aura-asteria-en"
424
- elif speaker == "John":
425
- params["model"] = "aura-perseus-en"
426
- else:
427
- raise ValueError(f"Unknown speaker: {speaker}")
428
 
429
- headers = {
430
- "Accept": "audio/mpeg",
431
- "Content-Type": "application/json",
432
- "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
433
- }
434
 
435
- body = {
436
- "text": text
437
- }
438
 
439
- print("[LOG] Sending TTS request to Deepgram...")
440
- response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
441
 
442
- if response.status_code != 200:
443
- print(f"[ERROR] Deepgram TTS API returned status code {response.status_code}: {response.text}")
444
- raise ValueError(f"Deepgram TTS API error: {response.status_code} - {response.text}")
445
-
446
- content_type = response.headers.get('Content-Type', '')
447
- if 'audio/mpeg' not in content_type:
448
- print("[ERROR] Unexpected Content-Type received from Deepgram:", content_type)
449
- print("[ERROR] Response content:", response.text)
450
- raise ValueError("Unexpected Content-Type received from Deepgram.")
451
-
452
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
453
- for chunk in response.iter_content(chunk_size=8192):
454
- if chunk:
455
- mp3_file.write(chunk)
456
- mp3_temp_path = mp3_file.name
457
- print(f"[LOG] Audio received from Deepgram and saved at: {mp3_temp_path}")
458
-
459
- # Normalize audio volume
460
- audio_seg = AudioSegment.from_file(mp3_temp_path, format="mp3")
461
- audio_seg = effects.normalize(audio_seg)
462
 
 
463
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
464
- audio_seg.export(final_mp3_path, format="mp3")
465
- print("[LOG] Audio post-processed and saved at:", final_mp3_path)
466
-
467
- if os.path.exists(mp3_temp_path):
468
- os.remove(mp3_temp_path)
469
- print(f"[LOG] Removed temporary MP3 file: {mp3_temp_path}")
470
-
471
  return final_mp3_path
 
472
  except Exception as e:
473
  print("[ERROR] Error generating audio:", e)
474
  raise ValueError(f"Error generating audio: {str(e)}")
475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  def transcribe_youtube_video(video_url: str) -> str:
477
  print("[LOG] Transcribing YouTube video:", video_url)
478
  fd, audio_file = tempfile.mkstemp(suffix=".wav")
@@ -494,20 +451,17 @@ def transcribe_youtube_video(video_url: str) -> str:
494
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
495
  ydl.download([video_url])
496
  except yt_dlp.utils.DownloadError as e:
497
- print("[ERROR] yt-dlp download error:", e)
498
  raise ValueError(f"Error downloading YouTube video: {str(e)}")
499
 
500
  print("[LOG] Audio downloaded at:", audio_file)
501
  try:
502
- # Run ASR on the downloaded audio
503
  result = asr_pipeline(audio_file)
504
  transcript = result["text"]
505
  print("[LOG] Transcription completed.")
506
  return transcript.strip()
507
  except Exception as e:
508
- print("[ERROR] ASR transcription error:", e)
509
  raise ValueError(f"Error transcribing YouTube video: {str(e)}")
510
  finally:
511
  if os.path.exists(audio_file):
512
  os.remove(audio_file)
513
- print(f"[LOG] Removed temporary audio file: {audio_file}")
 
66
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
67
  """
68
  Determines if the fetched information meets the sufficiency criteria.
 
 
 
 
69
  """
70
  word_count = len(text.split())
71
  print(f"[DEBUG] Aggregated word count: {word_count}")
 
74
  def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
75
  """
76
  Queries the Groq API to retrieve additional relevant information from the LLM's knowledge base.
 
 
 
 
77
  """
78
  print("[LOG] Querying LLM for additional information.")
 
79
  system_prompt = (
80
  "You are an AI assistant with extensive knowledge up to 2023-10. "
81
  "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
 
85
  )
86
 
87
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
88
  try:
89
  response = groq_client.chat.completions.create(
90
  messages=[{"role": "system", "content": system_prompt}],
 
116
 
117
  summary_parts = []
118
 
 
119
  wiki_summary = fetch_wikipedia_summary(topic)
120
  if wiki_summary:
121
  summary_parts.append(f"From Wikipedia: {wiki_summary}")
122
 
 
123
  for name, url in sources.items():
124
  try:
125
  items = fetch_rss_feed(url)
126
  if not items:
127
  continue
 
128
  title, desc, link = find_relevant_article(items, topic, min_match=2)
129
  if link:
130
  article_text = fetch_article_text(link)
 
141
  print(aggregated_info)
142
 
143
  if not is_sufficient(aggregated_info):
144
+ print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
145
  additional_info = query_llm_for_additional_info(topic, aggregated_info)
146
  if additional_info:
147
  aggregated_info += " " + additional_info
148
  else:
149
+ print("[ERROR] Failed to retrieve additional info from LLM.")
150
 
151
  if not aggregated_info:
 
152
  return f"Sorry, I couldn't find recent information on '{topic}'."
153
 
154
  return aggregated_info
 
156
  def fetch_wikipedia_summary(topic: str) -> str:
157
  print("[LOG] Fetching Wikipedia summary for:", topic)
158
  try:
 
159
  search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
160
  resp = requests.get(search_url)
161
  if resp.status_code != 200:
162
+ print(f"[ERROR] Failed to fetch Wikipedia search for topic: {topic}")
163
  return ""
164
  data = resp.json()
165
  if len(data) > 1 and data[1]:
166
  title = data[1][0]
 
167
  summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
168
  s_resp = requests.get(summary_url)
169
  if s_resp.status_code == 200:
170
  s_data = s_resp.json()
171
  if "extract" in s_data:
172
+ print("[LOG] Wikipedia summary fetched.")
173
  return s_data["extract"]
 
174
  return ""
175
  except Exception as e:
176
  print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
 
181
  try:
182
  resp = requests.get(feed_url)
183
  if resp.status_code != 200:
184
+ print(f"[ERROR] Failed to fetch RSS feed {feed_url}")
185
  return []
186
  soup = BeautifulSoup(resp.content, "html.parser")
187
  items = soup.find_all("item")
188
+ print(f"[LOG] Number of items: {len(items)} from {feed_url}")
189
  return items
190
  except Exception as e:
191
+ print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
192
  return []
193
 
194
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
 
 
 
 
 
 
 
195
  print("[LOG] Finding relevant articles...")
196
  keywords = re.findall(r'\w+', topic.lower())
 
 
197
  for item in items:
198
  title = item.find("title").get_text().strip() if item.find("title") else ""
199
  description = item.find("description").get_text().strip() if item.find("description") else ""
200
  text = f"{title.lower()} {description.lower()}"
201
  matches = sum(1 for kw in keywords if kw in text)
 
202
  if matches >= min_match:
203
  link = item.find("link").get_text().strip() if item.find("link") else ""
204
+ print(f"[LOG] Relevant article: {title}")
205
  return title, description, link
 
206
  return None, None, None
207
 
208
  def fetch_article_text(link: str) -> str:
209
+ print("[LOG] Fetching article text:", link)
210
  if not link:
 
211
  return ""
212
  try:
213
  resp = requests.get(link)
214
  if resp.status_code != 200:
215
+ print(f"[ERROR] Failed to fetch article with status {resp.status_code}")
216
  return ""
217
  soup = BeautifulSoup(resp.text, 'html.parser')
218
  paragraphs = soup.find_all("p")
219
  text = " ".join(p.get_text() for p in paragraphs[:5])
 
220
  return text.strip()
221
  except Exception as e:
222
  print(f"[ERROR] Error fetching article text: {e}")
 
240
  "Casual": "like a conversation between close friends, relaxed and informal",
241
  "Youthful": "like how teenagers might chat, energetic and lively"
242
  }
 
243
  chosen_tone = tone_description.get(tone, "casual")
244
 
245
  prompt = (
 
261
  " ]\n"
262
  "}"
263
  )
264
+
265
  print("[LOG] Sending prompt to Groq:")
266
  print(prompt)
267
 
 
273
  temperature=0.7
274
  )
275
  except Exception as e:
 
276
  raise ValueError(f"Error communicating with Groq API: {str(e)}")
277
 
278
  raw_content = response.choices[0].message.content.strip()
279
+ start_index = raw_content.find('{')
280
+ end_index = raw_content.rfind('}')
 
 
 
 
 
281
  if start_index == -1 or end_index == -1:
282
+ raise ValueError("Failed to parse dialogue: No JSON found.")
 
 
 
283
 
284
+ json_str = raw_content[start_index:end_index+1].strip()
285
+ data = json.loads(json_str)
286
+ return Dialogue(**data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # -------------------------------------------------------------
289
+ # Helper function: Insert random filler words, extra punctuation
290
+ # BUT we'll handle that chunk by chunk (see below).
291
+ # -------------------------------------------------------------
292
  def _make_text_sound_more_human(text: str) -> str:
293
  """
294
+ Inserts small filler words and modifies punctuation
295
+ for more natural-sounding speech.
296
  """
 
 
297
  fillers = ["uh", "um", "ah", "hmm", "you know", "well", "I mean", "like"]
298
+ # Insert filler sometimes at start or middle:
299
+ if text and random.random() < 0.4:
300
+ filler = random.choice(fillers)
301
+ if random.random() < 0.5:
302
+ text = f"{filler}, {text}"
303
+ else:
304
+ words = text.split()
305
+ mid = len(words) // 2
306
+ text = " ".join(words[:mid] + [f"{filler},"] + words[mid:])
307
 
308
+ # Possibly turn periods into "..." to force a pause
309
+ text = re.sub(r'\.(\s|$)', lambda m: "..." + m.group(1), text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
+ # Possibly turn "?" into "?!" or "!!" for exclamation
312
+ if random.random() < 0.2:
313
+ text = text.replace("?", "?!")
314
+ if random.random() < 0.2:
315
+ text = text.replace("!", "!!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ return text.strip()
318
+
319
+ def _split_into_sentences_and_phrases(text: str):
320
+ """
321
+ Splits the text into smaller chunks so each chunk can be TTS-ed
322
+ individually for better pacing. We'll look for ., !, or ?
323
+ as sentence boundaries. Also splits by commas for short phrases.
324
+ """
325
+ # Split by sentence enders with a lookbehind to keep delimiters separate.
326
+ # We can then further split by commas if the sentence is long.
327
+ # E.g. "Hello there. This is a test?" => ["Hello there.", "This is a test?"]
328
+ # Then if "Hello there." is too big, we might split by commas as well.
329
+ boundaries = re.split(r'([.?!])', text)
330
+
331
+ # Rebuild into "sentence + punctuation" pairs
332
+ phrases = []
333
+ for i in range(0, len(boundaries), 2):
334
+ if i + 1 < len(boundaries):
335
+ chunk = (boundaries[i] + boundaries[i+1]).strip()
336
+ else:
337
+ chunk = boundaries[i].strip()
338
+ if chunk:
339
+ # Now optionally split chunk by commas if it's too big
340
+ subparts = chunk.split(',')
341
+ # If there's more than 1 subpart, rejoin them carefully so each subpart can be TTS-ed on its own
342
+ for idx, sp in enumerate(subparts):
343
+ part = sp.strip()
344
+ if part:
345
+ # Re-add comma except on the last one
346
+ if idx < len(subparts) - 1:
347
+ part += ","
348
+ phrases.append(part)
349
+ return phrases
350
 
351
  def generate_audio_mp3(text: str, speaker: str) -> str:
352
  try:
353
  print(f"[LOG] Generating audio for speaker: {speaker}")
354
 
355
+ # Step 1: Split text into small pieces (phrases, sentences)
356
+ fragments = _split_into_sentences_and_phrases(text)
 
 
 
357
 
358
+ # Step 2: For each fragment, transform it to be more human-like, TTS it, then combine
359
+ all_segments = []
360
+ for frag in fragments:
361
+ if not frag.strip():
362
+ continue
363
 
364
+ # Make the chunk more "human"
365
+ human_chunk = _make_text_sound_more_human(frag)
 
 
 
 
 
366
 
367
+ # TTS this chunk
368
+ mp3_path = _tts_chunk(human_chunk, speaker)
369
+ seg = AudioSegment.from_file(mp3_path, format="mp3")
370
+ seg = effects.normalize(seg)
371
+ all_segments.append(seg)
372
 
373
+ # Clean up
374
+ if os.path.exists(mp3_path):
375
+ os.remove(mp3_path)
376
 
377
+ if not all_segments:
378
+ raise ValueError("No audio segments produced.")
379
 
380
+ # Step 3: Combine segments with a short silence between
381
+ final_audio = all_segments[0]
382
+ short_silence = AudioSegment.silent(duration=300) # 300ms silence
383
+ for seg in all_segments[1:]:
384
+ final_audio = final_audio + short_silence + seg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ # Step 4: Save combined
387
  final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
388
+ final_audio.export(final_mp3_path, format="mp3")
389
+ print("[LOG] Combined audio saved at:", final_mp3_path)
 
 
 
 
 
390
  return final_mp3_path
391
+
392
  except Exception as e:
393
  print("[ERROR] Error generating audio:", e)
394
  raise ValueError(f"Error generating audio: {str(e)}")
395
 
396
+ def _tts_chunk(text: str, speaker: str) -> str:
397
+ """
398
+ Helper function to do TTS on a single chunk of text
399
+ (so we can call multiple times).
400
+ """
401
+ deepgram_api_url = "https://api.deepgram.com/v1/speak"
402
+ params = {
403
+ "model": "aura-asteria-en", # default female
404
+ }
405
+ if speaker == "John":
406
+ params["model"] = "aura-perseus-en"
407
+
408
+ headers = {
409
+ "Accept": "audio/mpeg",
410
+ "Content-Type": "application/json",
411
+ "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
412
+ }
413
+ body = {
414
+ "text": text
415
+ }
416
+
417
+ response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
418
+ if response.status_code != 200:
419
+ raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
420
+
421
+ content_type = response.headers.get('Content-Type', '')
422
+ if 'audio/mpeg' not in content_type:
423
+ raise ValueError("Unexpected Content-Type from Deepgram.")
424
+
425
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
426
+ for chunk in response.iter_content(chunk_size=8192):
427
+ if chunk:
428
+ mp3_file.write(chunk)
429
+ mp3_path = mp3_file.name
430
+
431
+ return mp3_path
432
+
433
  def transcribe_youtube_video(video_url: str) -> str:
434
  print("[LOG] Transcribing YouTube video:", video_url)
435
  fd, audio_file = tempfile.mkstemp(suffix=".wav")
 
451
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
452
  ydl.download([video_url])
453
  except yt_dlp.utils.DownloadError as e:
 
454
  raise ValueError(f"Error downloading YouTube video: {str(e)}")
455
 
456
  print("[LOG] Audio downloaded at:", audio_file)
457
  try:
 
458
  result = asr_pipeline(audio_file)
459
  transcript = result["text"]
460
  print("[LOG] Transcription completed.")
461
  return transcript.strip()
462
  except Exception as e:
 
463
  raise ValueError(f"Error transcribing YouTube video: {str(e)}")
464
  finally:
465
  if os.path.exists(audio_file):
466
  os.remove(audio_file)
467
+ print(f"[LOG] Removed temp audio file: {audio_file}")