MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

9bb818e

verified ·

1 Parent(s): cc24111

Update utils.py

Browse files

Files changed (1) hide show

utils.py +50 -16

utils.py CHANGED Viewed

@@ -165,7 +165,7 @@ def research_topic(topic: str) -> str:
     print("[DEBUG] Aggregated info from primary sources:")
     print(aggregated_info)
-    # Fallback to LLM if insufficient
     if not is_sufficient(aggregated_info):
         print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
         additional_info = query_llm_for_additional_info(topic, aggregated_info)
@@ -277,9 +277,6 @@ def generate_script(
     """
     Sends the system_prompt plus input_text to the Groq LLM to generate a
     multi-speaker Dialogue in JSON, returning a Dialogue object.
-    sponsor_style can be "Separate Break" or "Blended".
-    We add instructions telling the model how to integrate the sponsor content.
     """
     print("[LOG] Generating script with tone:", tone, "and length:", target_length)
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -306,7 +303,7 @@ def generate_script(
             "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
             "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
         )
-    else:  # Blended
         sponsor_instructions = (
             "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
             "Avoid abrupt transitions."
@@ -377,7 +374,6 @@ def generate_script(
             new_dialogue_items.append(DialogueItem(**d))
         return Dialogue(dialogue=new_dialogue_items)
     except json.JSONDecodeError as e:
         print("[ERROR] JSON decoding (format) failed:", e)
         raise ValueError(f"Failed to parse dialogue: {str(e)}")
@@ -472,7 +468,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
                     mp3_file.write(chunk)
             mp3_path = mp3_file.name
-        # Normalize
         audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
         audio_seg = effects.normalize(audio_seg)
@@ -493,7 +489,7 @@ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
 def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     """
     1) "SaaS" => "sass"
-    2) Insert periods in uppercase abbreviations -> remove for TTS
     3) Convert decimals like "3.14" -> "three point one four"
     4) Convert pure integer numbers like "20" -> "twenty"
     5) Expand leftover all-caps
@@ -505,7 +501,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     # 1) "SaaS" => "sass"
     text = re.sub(r"\b(?i)SaaS\b", "sass", text)
-    # 2) Insert periods for uppercase abbreviations (>=2 chars), then remove them
     def insert_periods_for_abbrev(m):
         abbr = m.group(0)
         parted = ".".join(list(abbr)) + "."
@@ -552,14 +548,11 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
     # 7) Emotive placeholders
-    #   "haha", "ha", "heh", "lol" => "(* laughs *)"
     text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
-    #   "sigh" => "(* sighs *)"
     text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
-    #   "groan", "moan" => "(* groans *)"
     text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
-    # 8) Insert filler words if speaker != Jane
     if speaker != "Jane":
         def insert_thinking_pause(m):
             word = m.group(1)
@@ -584,9 +577,27 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     return text.strip()
 def number_to_words(n: int) -> str:
     """
-    Basic integer-to-words up to ~99999.
     For a robust approach, consider the 'num2words' library.
     """
     if n == 0:
@@ -637,8 +648,8 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
     """
     Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
     1) Start with 2 seconds of music alone before speech begins.
-    2) Loop music if shorter than final audio length.
-    3) Lower music volume so speech is clear.
     """
     if custom_music_path:
         music_path = custom_music_path
@@ -661,3 +672,26 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
     looped_music = looped_music[:total_length_ms]
     final_mix = looped_music.overlay(spoken, position=2000)
     return final_mix

     print("[DEBUG] Aggregated info from primary sources:")
     print(aggregated_info)
+    # If not enough data, fallback to LLM
     if not is_sufficient(aggregated_info):
         print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
         additional_info = query_llm_for_additional_info(topic, aggregated_info)
     """
     Sends the system_prompt plus input_text to the Groq LLM to generate a
     multi-speaker Dialogue in JSON, returning a Dialogue object.
     """
     print("[LOG] Generating script with tone:", tone, "and length:", target_length)
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
             "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
             "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
         )
+    else:
         sponsor_instructions = (
             "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
             "Avoid abrupt transitions."
             new_dialogue_items.append(DialogueItem(**d))
         return Dialogue(dialogue=new_dialogue_items)
     except json.JSONDecodeError as e:
         print("[ERROR] JSON decoding (format) failed:", e)
         raise ValueError(f"Failed to parse dialogue: {str(e)}")
                     mp3_file.write(chunk)
             mp3_path = mp3_file.name
+        # Normalize volume
         audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
         audio_seg = effects.normalize(audio_seg)
 def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     """
     1) "SaaS" => "sass"
+    2) Insert periods for uppercase abbreviations -> remove for TTS
     3) Convert decimals like "3.14" -> "three point one four"
     4) Convert pure integer numbers like "20" -> "twenty"
     5) Expand leftover all-caps
     # 1) "SaaS" => "sass"
     text = re.sub(r"\b(?i)SaaS\b", "sass", text)
+    # 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
     def insert_periods_for_abbrev(m):
         abbr = m.group(0)
         parted = ".".join(list(abbr)) + "."
     text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
     # 7) Emotive placeholders
     text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
     text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
     text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
+    # 8) Insert filler words if speaker != "Jane"
     if speaker != "Jane":
         def insert_thinking_pause(m):
             word = m.group(1)
     return text.strip()
+def _spell_digits(d: str) -> str:
+    """
+    Convert individual digits '3' -> 'three'.
+    """
+    digit_map = {
+        '0': 'zero',
+        '1': 'one',
+        '2': 'two',
+        '3': 'three',
+        '4': 'four',
+        '5': 'five',
+        '6': 'six',
+        '7': 'seven',
+        '8': 'eight',
+        '9': 'nine'
+    }
+    return " ".join(digit_map[ch] for ch in d if ch in digit_map)
 def number_to_words(n: int) -> str:
     """
+    Basic integer-to-words up to ~99999.
     For a robust approach, consider the 'num2words' library.
     """
     if n == 0:
     """
     Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
     1) Start with 2 seconds of music alone before speech begins.
+    2) Loop the music if it's shorter than the final audio length.
+    3) Lower music volume so the speech is clear.
     """
     if custom_music_path:
         music_path = custom_music_path
     looped_music = looped_music[:total_length_ms]
     final_mix = looped_music.overlay(spoken, position=2000)
     return final_mix
+# This function is new for short Q&A calls
+def call_groq_api_for_qa(system_prompt: str) -> str:
+    """
+    A minimal placeholder for your short Q&A LLM call.
+    Must return a JSON string, e.g.:
+    {"speaker": "John", "text": "Short answer here"}
+    """
+    groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    try:
+        response = groq_client.chat.completions.create(
+            messages=[{"role": "system", "content": system_prompt}],
+            model="llama-3.3-70b-versatile",
+            max_tokens=512,
+            temperature=0.7
+        )
+    except Exception as e:
+        print("[ERROR] Groq API error:", e)
+        fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
+        return json.dumps(fallback)
+    raw_content = response.choices[0].message.content.strip()
+    return raw_content