Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -165,7 +165,7 @@ def research_topic(topic: str) -> str:
|
|
165 |
print("[DEBUG] Aggregated info from primary sources:")
|
166 |
print(aggregated_info)
|
167 |
|
168 |
-
#
|
169 |
if not is_sufficient(aggregated_info):
|
170 |
print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
|
171 |
additional_info = query_llm_for_additional_info(topic, aggregated_info)
|
@@ -489,7 +489,7 @@ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
|
|
489 |
def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
490 |
"""
|
491 |
1) "SaaS" => "sass"
|
492 |
-
2) Insert periods
|
493 |
3) Convert decimals like "3.14" -> "three point one four"
|
494 |
4) Convert pure integer numbers like "20" -> "twenty"
|
495 |
5) Expand leftover all-caps
|
@@ -501,7 +501,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
|
501 |
# 1) "SaaS" => "sass"
|
502 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
503 |
|
504 |
-
# 2) Insert periods
|
505 |
def insert_periods_for_abbrev(m):
|
506 |
abbr = m.group(0)
|
507 |
parted = ".".join(list(abbr)) + "."
|
@@ -516,7 +516,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
|
516 |
# 3) Hyphens -> spaces
|
517 |
text = re.sub(r"-", " ", text)
|
518 |
|
519 |
-
# 4) Convert decimals
|
520 |
def convert_decimal(m):
|
521 |
number_str = m.group()
|
522 |
parts = number_str.split('.')
|
@@ -673,10 +673,9 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
|
|
673 |
final_mix = looped_music.overlay(spoken, position=2000)
|
674 |
return final_mix
|
675 |
|
676 |
-
#
|
677 |
def call_groq_api_for_qa(system_prompt: str) -> str:
|
678 |
"""
|
679 |
-
A minimal placeholder for your short Q&A LLM call.
|
680 |
Must return a JSON string, e.g.:
|
681 |
{"speaker": "John", "text": "Short answer here"}
|
682 |
"""
|
|
|
165 |
print("[DEBUG] Aggregated info from primary sources:")
|
166 |
print(aggregated_info)
|
167 |
|
168 |
+
# Fallback to LLM if insufficient
|
169 |
if not is_sufficient(aggregated_info):
|
170 |
print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
|
171 |
additional_info = query_llm_for_additional_info(topic, aggregated_info)
|
|
|
489 |
def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
490 |
"""
|
491 |
1) "SaaS" => "sass"
|
492 |
+
2) Insert periods in uppercase abbreviations -> remove for TTS
|
493 |
3) Convert decimals like "3.14" -> "three point one four"
|
494 |
4) Convert pure integer numbers like "20" -> "twenty"
|
495 |
5) Expand leftover all-caps
|
|
|
501 |
# 1) "SaaS" => "sass"
|
502 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
503 |
|
504 |
+
# 2) Insert periods for uppercase abbreviations (>=2 chars), then remove them
|
505 |
def insert_periods_for_abbrev(m):
|
506 |
abbr = m.group(0)
|
507 |
parted = ".".join(list(abbr)) + "."
|
|
|
516 |
# 3) Hyphens -> spaces
|
517 |
text = re.sub(r"-", " ", text)
|
518 |
|
519 |
+
# 4) Convert decimals
|
520 |
def convert_decimal(m):
|
521 |
number_str = m.group()
|
522 |
parts = number_str.split('.')
|
|
|
673 |
final_mix = looped_music.overlay(spoken, position=2000)
|
674 |
return final_mix
|
675 |
|
676 |
+
# For short Q&A calls
|
677 |
def call_groq_api_for_qa(system_prompt: str) -> str:
|
678 |
"""
|
|
|
679 |
Must return a JSON string, e.g.:
|
680 |
{"speaker": "John", "text": "Short answer here"}
|
681 |
"""
|