MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 6

Commit

e62d0b2

verified ·

1 Parent(s): e7283ef

Update utils.py

Browse files

Files changed (1) hide show

utils.py +102 -136

utils.py CHANGED Viewed

@@ -12,9 +12,9 @@ from pydub import AudioSegment, effects
 from transformers import pipeline
 import yt_dlp
 import tiktoken
-from groq import Groq  # Ensure Groq client is imported
 import numpy as np
-import torch  # Added to check CUDA availability
 import random
 class DialogueItem(BaseModel):
@@ -56,7 +56,7 @@ def extract_text_from_url(url):
 def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
     """
     Shifts the pitch of an AudioSegment by a given number of semitones.
-    Positive semitones shift the pitch up, negative shift it down.
     """
     print(f"[LOG] Shifting pitch by {semitones} semitones.")
     new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
@@ -83,7 +83,6 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
         f"Existing Information: {existing_text}\n\n"
         "Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
     )
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     try:
         response = groq_client.chat.completions.create(
@@ -95,14 +94,13 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
     except Exception as e:
         print("[ERROR] Groq API error during fallback:", e)
         return ""
     additional_info = response.choices[0].message.content.strip()
     print("[DEBUG] Additional information from LLM:")
     print(additional_info)
     return additional_info
 def research_topic(topic: str) -> str:
-    # Sources:
     sources = {
         "BBC": "https://feeds.bbci.co.uk/news/rss.xml",
         "CNN": "http://rss.cnn.com/rss/edition.rss",
@@ -116,6 +114,7 @@ def research_topic(topic: str) -> str:
     summary_parts = []
     wiki_summary = fetch_wikipedia_summary(topic)
     if wiki_summary:
         summary_parts.append(f"From Wikipedia: {wiki_summary}")
@@ -137,7 +136,7 @@ def research_topic(topic: str) -> str:
             continue
     aggregated_info = " ".join(summary_parts)
-    print("[DEBUG] Aggregated information from primary sources.")
     print(aggregated_info)
     if not is_sufficient(aggregated_info):
@@ -159,7 +158,7 @@ def fetch_wikipedia_summary(topic: str) -> str:
         search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
         resp = requests.get(search_url)
         if resp.status_code != 200:
-            print(f"[ERROR] Failed to fetch Wikipedia search for topic: {topic}")
             return ""
         data = resp.json()
         if len(data) > 1 and data[1]:
@@ -169,7 +168,7 @@ def fetch_wikipedia_summary(topic: str) -> str:
             if s_resp.status_code == 200:
                 s_data = s_resp.json()
                 if "extract" in s_data:
-                    print("[LOG] Wikipedia summary fetched.")
                     return s_data["extract"]
         return ""
     except Exception as e:
@@ -181,17 +180,19 @@ def fetch_rss_feed(feed_url: str) -> list:
     try:
         resp = requests.get(feed_url)
         if resp.status_code != 200:
-            print(f"[ERROR] Failed to fetch RSS feed {feed_url}")
             return []
         soup = BeautifulSoup(resp.content, "html.parser")
         items = soup.find_all("item")
-        print(f"[LOG] Number of items: {len(items)} from {feed_url}")
         return items
     except Exception as e:
         print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
         return []
 def find_relevant_article(items, topic: str, min_match=2) -> tuple:
     print("[LOG] Finding relevant articles...")
     keywords = re.findall(r'\w+', topic.lower())
     for item in items:
@@ -201,12 +202,12 @@ def find_relevant_article(items, topic: str, min_match=2) -> tuple:
         matches = sum(1 for kw in keywords if kw in text)
         if matches >= min_match:
             link = item.find("link").get_text().strip() if item.find("link") else ""
-            print(f"[LOG] Relevant article: {title}")
             return title, description, link
     return None, None, None
 def fetch_article_text(link: str) -> str:
-    print("[LOG] Fetching article text:", link)
     if not link:
         return ""
     try:
@@ -262,9 +263,6 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
         "}"
     )
-    print("[LOG] Sending prompt to Groq:")
-    print(prompt)
     try:
         response = groq_client.chat.completions.create(
             messages=[{"role": "system", "content": prompt}],
@@ -285,152 +283,120 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
     data = json.loads(json_str)
     return Dialogue(**data)
-# -------------------------------------------------------------
-# Helper function: Insert random filler words, extra punctuation
-# BUT we'll handle that chunk by chunk (see below).
-# -------------------------------------------------------------
-def _make_text_sound_more_human(text: str) -> str:
     """
-    Inserts small filler words and modifies punctuation
-    for more natural-sounding speech.
     """
-    fillers = ["uh", "um", "ah", "hmm", "you know", "well", "I mean", "like"]
-    # Insert filler sometimes at start or middle:
-    if text and random.random() < 0.4:
-        filler = random.choice(fillers)
         if random.random() < 0.5:
-            text = f"{filler}, {text}"
         else:
-            words = text.split()
-            mid = len(words) // 2
-            text = " ".join(words[:mid] + [f"{filler},"] + words[mid:])
-    # Possibly turn periods into "..." to force a pause
-    text = re.sub(r'\.(\s|$)', lambda m: "..." + m.group(1), text)
-    # Possibly turn "?" into "?!" or "!!" for exclamation
-    if random.random() < 0.2:
-        text = text.replace("?", "?!")
-    if random.random() < 0.2:
-        text = text.replace("!", "!!")
     return text.strip()
-def _split_into_sentences_and_phrases(text: str):
     """
-    Splits the text into smaller chunks so each chunk can be TTS-ed
-    individually for better pacing. We'll look for ., !, or ?
-    as sentence boundaries. Also splits by commas for short phrases.
     """
-    # Split by sentence enders with a lookbehind to keep delimiters separate.
-    # We can then further split by commas if the sentence is long.
-    # E.g. "Hello there. This is a test?" => ["Hello there.", "This is a test?"]
-    # Then if "Hello there." is too big, we might split by commas as well.
-    boundaries = re.split(r'([.?!])', text)
-    # Rebuild into "sentence + punctuation" pairs
-    phrases = []
-    for i in range(0, len(boundaries), 2):
-        if i + 1 < len(boundaries):
-            chunk = (boundaries[i] + boundaries[i+1]).strip()
-        else:
-            chunk = boundaries[i].strip()
-        if chunk:
-            # Now optionally split chunk by commas if it's too big
-            subparts = chunk.split(',')
-            # If there's more than 1 subpart, rejoin them carefully so each subpart can be TTS-ed on its own
-            for idx, sp in enumerate(subparts):
-                part = sp.strip()
-                if part:
-                    # Re-add comma except on the last one
-                    if idx < len(subparts) - 1:
-                        part += ","
-                    phrases.append(part)
-    return phrases
 def generate_audio_mp3(text: str, speaker: str) -> str:
     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
-        # Step 1: Split text into small pieces (phrases, sentences)
-        fragments = _split_into_sentences_and_phrases(text)
-        # Step 2: For each fragment, transform it to be more human-like, TTS it, then combine
-        all_segments = []
-        for frag in fragments:
-            if not frag.strip():
-                continue
-            # Make the chunk more "human"
-            human_chunk = _make_text_sound_more_human(frag)
-            # TTS this chunk
-            mp3_path = _tts_chunk(human_chunk, speaker)
-            seg = AudioSegment.from_file(mp3_path, format="mp3")
-            seg = effects.normalize(seg)
-            all_segments.append(seg)
-            # Clean up
-            if os.path.exists(mp3_path):
-                os.remove(mp3_path)
-        if not all_segments:
-            raise ValueError("No audio segments produced.")
-        # Step 3: Combine segments with a short silence between
-        final_audio = all_segments[0]
-        short_silence = AudioSegment.silent(duration=300)  # 300ms silence
-        for seg in all_segments[1:]:
-            final_audio = final_audio + short_silence + seg
-        # Step 4: Save combined
-        final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
-        final_audio.export(final_mp3_path, format="mp3")
-        print("[LOG] Combined audio saved at:", final_mp3_path)
         return final_mp3_path
     except Exception as e:
         print("[ERROR] Error generating audio:", e)
         raise ValueError(f"Error generating audio: {str(e)}")
-def _tts_chunk(text: str, speaker: str) -> str:
     """
-    Helper function to do TTS on a single chunk of text
-    (so we can call multiple times).
     """
-    deepgram_api_url = "https://api.deepgram.com/v1/speak"
-    params = {
-        "model": "aura-asteria-en",  # default female
-    }
-    if speaker == "John":
-        params["model"] = "aura-perseus-en"
-    headers = {
-        "Accept": "audio/mpeg",
-        "Content-Type": "application/json",
-        "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
-    }
-    body = {
-        "text": text
-    }
-    response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
-    if response.status_code != 200:
-        raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
-    content_type = response.headers.get('Content-Type', '')
-    if 'audio/mpeg' not in content_type:
-        raise ValueError("Unexpected Content-Type from Deepgram.")
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
-        for chunk in response.iter_content(chunk_size=8192):
-            if chunk:
-                mp3_file.write(chunk)
-        mp3_path = mp3_file.name
-    return mp3_path
-def transcribe_youtube_video(video_url: str) -> str:
     print("[LOG] Transcribing YouTube video:", video_url)
     fd, audio_file = tempfile.mkstemp(suffix=".wav")
     os.close(fd)
@@ -464,4 +430,4 @@ def transcribe_youtube_video(video_url: str) -> str:
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
-            print(f"[LOG] Removed temp audio file: {audio_file}")

 from transformers import pipeline
 import yt_dlp
 import tiktoken
+from groq import Groq
 import numpy as np
+import torch
 import random
 class DialogueItem(BaseModel):
 def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
     """
     Shifts the pitch of an AudioSegment by a given number of semitones.
+    Positive semitones shift the pitch up, negative shifts it down.
     """
     print(f"[LOG] Shifting pitch by {semitones} semitones.")
     new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
         f"Existing Information: {existing_text}\n\n"
         "Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
     )
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     try:
         response = groq_client.chat.completions.create(
     except Exception as e:
         print("[ERROR] Groq API error during fallback:", e)
         return ""
     additional_info = response.choices[0].message.content.strip()
     print("[DEBUG] Additional information from LLM:")
     print(additional_info)
     return additional_info
 def research_topic(topic: str) -> str:
+    # News sources
     sources = {
         "BBC": "https://feeds.bbci.co.uk/news/rss.xml",
         "CNN": "http://rss.cnn.com/rss/edition.rss",
     summary_parts = []
+    # Wikipedia summary
     wiki_summary = fetch_wikipedia_summary(topic)
     if wiki_summary:
         summary_parts.append(f"From Wikipedia: {wiki_summary}")
             continue
     aggregated_info = " ".join(summary_parts)
+    print("[DEBUG] Aggregated info from primary sources:")
     print(aggregated_info)
     if not is_sufficient(aggregated_info):
         search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
         resp = requests.get(search_url)
         if resp.status_code != 200:
+            print(f"[ERROR] Failed to fetch Wikipedia search results for topic: {topic}")
             return ""
         data = resp.json()
         if len(data) > 1 and data[1]:
             if s_resp.status_code == 200:
                 s_data = s_resp.json()
                 if "extract" in s_data:
+                    print("[LOG] Wikipedia summary fetched successfully.")
                     return s_data["extract"]
         return ""
     except Exception as e:
     try:
         resp = requests.get(feed_url)
         if resp.status_code != 200:
+            print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
             return []
         soup = BeautifulSoup(resp.content, "html.parser")
         items = soup.find_all("item")
         return items
     except Exception as e:
         print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
         return []
 def find_relevant_article(items, topic: str, min_match=2) -> tuple:
+    """
+    Searches for relevant articles based on topic keywords.
+    """
     print("[LOG] Finding relevant articles...")
     keywords = re.findall(r'\w+', topic.lower())
     for item in items:
         matches = sum(1 for kw in keywords if kw in text)
         if matches >= min_match:
             link = item.find("link").get_text().strip() if item.find("link") else ""
+            print(f"[LOG] Relevant article found: {title}")
             return title, description, link
     return None, None, None
 def fetch_article_text(link: str) -> str:
+    print("[LOG] Fetching article text from:", link)
     if not link:
         return ""
     try:
         "}"
     )
     try:
         response = groq_client.chat.completions.create(
             messages=[{"role": "system", "content": prompt}],
     data = json.loads(json_str)
     return Dialogue(**data)
+# --------------------------------------------------------------
+# TTS Preprocessing to handle decimals, hyphens, and selective fillers
+# --------------------------------------------------------------
+def _preprocess_text_for_tts(text: str) -> str:
     """
+    1) Convert decimals to spelled-out words ("3.14" -> "three point one four").
+    2) Replace hyphens with spaces.
+    3) Insert filler words only in certain contexts (like "I think", or after '?').
     """
+    # 1) Convert decimals
+    def convert_decimal(m):
+        number_str = m.group()  # e.g. "3.14"
+        parts = number_str.split('.')
+        whole_part = _spell_digits(parts[0])  # "three"
+        decimal_part = " ".join(_spell_digits(d) for d in parts[1])
+        return f"{whole_part} point {decimal_part}"
+    text = re.sub(r"\d+\.\d+", convert_decimal, text)
+    # 2) Hyphens -> spaces
+    text = re.sub(r"-", " ", text)
+    # 3) Targeted filler insertion
+    # a) Insert "uh" after "I think" or "I'm not sure", etc. (very naive approach)
+    text = re.sub(
+        r"(I think|I'm not sure|I guess)([,.]?\s)",
+        r"\1, uh,\2",
+        text,
+        flags=re.IGNORECASE
+    )
+    # b) If there's a "?" then sometimes insert "um," right after it
+    text = text.replace("?", "?<QMARK>")
+    def insert_filler_qmark(m):
         if random.random() < 0.5:
+            return "? um,"
         else:
+            return "?"
+    text = re.sub(r"\?<QMARK>", insert_filler_qmark, text)
     return text.strip()
+def _spell_digits(d: str) -> str:
     """
+    Convert each digit '3' -> 'three', '5' -> 'five', etc.
     """
+    digit_map = {
+        '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
+        '4': 'four','5': 'five','6': 'six','7': 'seven',
+        '8': 'eight','9': 'nine'
+    }
+    return " ".join(digit_map[ch] for ch in d if ch in digit_map)
 def generate_audio_mp3(text: str, speaker: str) -> str:
+    """
+    Main TTS function, calls Deepgram with preprocessed text.
+    """
     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
+        # Preprocess text (decimal/hyphen/fillers)
+        processed_text = _preprocess_text_for_tts(text)
+        # Define Deepgram API endpoint
+        deepgram_api_url = "https://api.deepgram.com/v1/speak"
+        params = {
+            "model": "aura-asteria-en",  # default female
+        }
+        if speaker == "John":
+            params["model"] = "aura-perseus-en"
+        headers = {
+            "Accept": "audio/mpeg",
+            "Content-Type": "application/json",
+            "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
+        }
+        body = {
+            "text": processed_text
+        }
+        print("[LOG] Sending TTS request to Deepgram...")
+        response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
+        if response.status_code != 200:
+            raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
+        content_type = response.headers.get('Content-Type', '')
+        if 'audio/mpeg' not in content_type:
+            raise ValueError("Unexpected Content-Type from Deepgram.")
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    mp3_file.write(chunk)
+            mp3_path = mp3_file.name
+        # Normalize volume
+        audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
+        audio_seg = effects.normalize(audio_seg)
+        final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+        audio_seg.export(final_mp3_path, format="mp3")
+        if os.path.exists(mp3_path):
+            os.remove(mp3_path)
         return final_mp3_path
     except Exception as e:
         print("[ERROR] Error generating audio:", e)
         raise ValueError(f"Error generating audio: {str(e)}")
+def transcribe_youtube_video(video_url: str) -> str:
     """
+    Downloads and transcribes the audio from a YouTube video using Whisper (pipeline).
     """
     print("[LOG] Transcribing YouTube video:", video_url)
     fd, audio_file = tempfile.mkstemp(suffix=".wav")
     os.close(fd)
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
+            print(f"[LOG] Removed temporary audio file: {audio_file}")