SearchPod1.0

Sleeping

App Files Files Community

siddhartharyaai commited on Jan 6

Commit

424917e

verified ·

1 Parent(s): 89feeb0

Update utils.py

Browse files

Files changed (1) hide show

utils.py +138 -184

utils.py CHANGED Viewed

@@ -66,10 +66,6 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
 def is_sufficient(text: str, min_word_count: int = 500) -> bool:
     """
     Determines if the fetched information meets the sufficiency criteria.
-    :param text: Aggregated text from primary sources.
-    :param min_word_count: Minimum number of words required.
-    :return: True if sufficient, False otherwise.
     """
     word_count = len(text.split())
     print(f"[DEBUG] Aggregated word count: {word_count}")
@@ -78,13 +74,8 @@ def is_sufficient(text: str, min_word_count: int = 500) -> bool:
 def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
     """
     Queries the Groq API to retrieve additional relevant information from the LLM's knowledge base.
-    :param topic: The research topic.
-    :param existing_text: The text already gathered from primary sources.
-    :return: Additional relevant information as a string.
     """
     print("[LOG] Querying LLM for additional information.")
-    # Define the system prompt for the LLM
     system_prompt = (
         "You are an AI assistant with extensive knowledge up to 2023-10. "
         "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
@@ -94,7 +85,6 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
     )
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     try:
         response = groq_client.chat.completions.create(
             messages=[{"role": "system", "content": system_prompt}],
@@ -126,18 +116,15 @@ def research_topic(topic: str) -> str:
     summary_parts = []
-    # Wikipedia summary
     wiki_summary = fetch_wikipedia_summary(topic)
     if wiki_summary:
         summary_parts.append(f"From Wikipedia: {wiki_summary}")
-    # For each news RSS
     for name, url in sources.items():
         try:
             items = fetch_rss_feed(url)
             if not items:
                 continue
-            # Use simple keyword matching
             title, desc, link = find_relevant_article(items, topic, min_match=2)
             if link:
                 article_text = fetch_article_text(link)
@@ -154,15 +141,14 @@ def research_topic(topic: str) -> str:
     print(aggregated_info)
     if not is_sufficient(aggregated_info):
-        print("[LOG] Insufficient information from primary sources. Initiating fallback to LLM.")
         additional_info = query_llm_for_additional_info(topic, aggregated_info)
         if additional_info:
             aggregated_info += " " + additional_info
         else:
-            print("[ERROR] Failed to retrieve additional information from LLM.")
     if not aggregated_info:
-        print("[LOG] No information found for the topic.")
         return f"Sorry, I couldn't find recent information on '{topic}'."
     return aggregated_info
@@ -170,24 +156,21 @@ def research_topic(topic: str) -> str:
 def fetch_wikipedia_summary(topic: str) -> str:
     print("[LOG] Fetching Wikipedia summary for:", topic)
     try:
-        # 1. Search for the topic
         search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
         resp = requests.get(search_url)
         if resp.status_code != 200:
-            print(f"[ERROR] Failed to fetch Wikipedia search results for topic: {topic}")
             return ""
         data = resp.json()
         if len(data) > 1 and data[1]:
             title = data[1][0]
-            # 2. Fetch summary
             summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
             s_resp = requests.get(summary_url)
             if s_resp.status_code == 200:
                 s_data = s_resp.json()
                 if "extract" in s_data:
-                    print("[LOG] Wikipedia summary fetched successfully.")
                     return s_data["extract"]
-        print("[LOG] No Wikipedia summary found for topic:", topic)
         return ""
     except Exception as e:
         print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
@@ -198,55 +181,42 @@ def fetch_rss_feed(feed_url: str) -> list:
     try:
         resp = requests.get(feed_url)
         if resp.status_code != 200:
-            print(f"[ERROR] Failed to fetch RSS feed: {feed_url} with status code {resp.status_code}")
             return []
         soup = BeautifulSoup(resp.content, "html.parser")
         items = soup.find_all("item")
-        print(f"[LOG] Number of items fetched from {feed_url}: {len(items)}")
         return items
     except Exception as e:
-        print(f"[ERROR] Exception occurred while fetching RSS feed {feed_url}: {e}")
         return []
 def find_relevant_article(items, topic: str, min_match=2) -> tuple:
-    """
-    Searches for relevant articles based on topic keywords.
-    :param items: List of RSS feed items
-    :param topic: Topic string
-    :param min_match: Minimum number of keyword matches required
-    :return: (title, description, link) or (None, None, None)
-    """
     print("[LOG] Finding relevant articles...")
     keywords = re.findall(r'\w+', topic.lower())
-    print(f"[LOG] Topic keywords: {keywords}")
     for item in items:
         title = item.find("title").get_text().strip() if item.find("title") else ""
         description = item.find("description").get_text().strip() if item.find("description") else ""
         text = f"{title.lower()} {description.lower()}"
         matches = sum(1 for kw in keywords if kw in text)
-        print(f"[DEBUG] Checking article: '{title}' | Matches: {matches}/{len(keywords)}")
         if matches >= min_match:
             link = item.find("link").get_text().strip() if item.find("link") else ""
-            print(f"[LOG] Relevant article found: {title}")
             return title, description, link
-    print("[LOG] No relevant articles found based on the current matching criteria.")
     return None, None, None
 def fetch_article_text(link: str) -> str:
-    print("[LOG] Fetching article text from:", link)
     if not link:
-        print("[LOG] No link provided for fetching article text.")
         return ""
     try:
         resp = requests.get(link)
         if resp.status_code != 200:
-            print(f"[ERROR] Failed to fetch article from link: {link} with status code {resp.status_code}")
             return ""
         soup = BeautifulSoup(resp.text, 'html.parser')
         paragraphs = soup.find_all("p")
         text = " ".join(p.get_text() for p in paragraphs[:5])
-        print("[LOG] Article text fetched successfully.")
         return text.strip()
     except Exception as e:
         print(f"[ERROR] Error fetching article text: {e}")
@@ -270,7 +240,6 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
         "Casual": "like a conversation between close friends, relaxed and informal",
         "Youthful": "like how teenagers might chat, energetic and lively"
     }
     chosen_tone = tone_description.get(tone, "casual")
     prompt = (
@@ -292,6 +261,7 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
         "    ]\n"
         "}"
     )
     print("[LOG] Sending prompt to Groq:")
     print(prompt)
@@ -303,176 +273,163 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
             temperature=0.7
         )
     except Exception as e:
-        print("[ERROR] Groq API error:", e)
         raise ValueError(f"Error communicating with Groq API: {str(e)}")
     raw_content = response.choices[0].message.content.strip()
-    print("[DEBUG] Raw API response content:")
-    print(raw_content)
-    content = raw_content.replace('```json', '').replace('```', '').strip()
-    start_index = content.find('{')
-    end_index = content.rfind('}')
     if start_index == -1 or end_index == -1:
-        print("[ERROR] Failed to parse dialogue. No JSON found.")
-        print("[ERROR] Entire response content:")
-        print(content)
-        raise ValueError("Failed to parse dialogue: Could not find JSON object in response.")
-    json_str = content[start_index:end_index+1].strip()
-    print("[DEBUG] Extracted JSON string:")
-    print(json_str)
-    try:
-        data = json.loads(json_str)
-        print("[LOG] Script generated successfully.")
-        return Dialogue(**data)
-    except json.JSONDecodeError as e:
-        print("[ERROR] JSON decoding failed:", e)
-        print("[ERROR] Response content causing failure:")
-        print(content)
-        raise ValueError(f"Failed to parse dialogue: {str(e)}")
-# ----------------------------------------------------------------------
-# We ONLY modify the generate_audio_mp3 flow below to insert random filler words
-# and modify punctuation (.,!?) for more natural TTS pauses and intonation.
-# ----------------------------------------------------------------------
 def _make_text_sound_more_human(text: str) -> str:
     """
-    Inserts small filler words and adds extra punctuation to encourage
-    natural-sounding pauses at commas, periods, exclamations, and question marks.
     """
-    # Filler words or short phrases
     fillers = ["uh", "um", "ah", "hmm", "you know", "well", "I mean", "like"]
-    # 1) Split text by punctuation but keep the punctuation in the result
-    #    We'll handle ".", "?", "!", and commas:
-    pattern = r'([.,?!])'
-    parts = re.split(pattern, text)
-    # 2) Process each chunk, occasionally inserting filler words or extra punctuation
-    processed_chunks = []
-    for i in range(len(parts)):
-        chunk = parts[i].strip()
-        # If the chunk is punctuation, keep it
-        if chunk in [".", ",", "?", "!"]:
-            # Possibly turn "." into "..." or add "..." after "?"
-            if chunk == "." and random.random() < 0.5:
-                chunk = "..."
-            elif chunk == "?" and random.random() < 0.3:
-                # Sometimes add "?!"
-                chunk = "?!"
-            elif chunk == "!" and random.random() < 0.3:
-                # Sometimes add "!!" for more emphasis
-                chunk = "!!"
-            processed_chunks.append(chunk)
-            continue
-        # Sometimes insert a filler at the start or mid-chunk
-        if chunk and random.random() < 0.3:
-            filler = random.choice(fillers)
-            # Insert at the beginning or in the middle
-            if random.random() < 0.5:
-                chunk = f"{filler}, {chunk}"
-            else:
-                # Insert near the middle
-                words = chunk.split()
-                mid = len(words) // 2
-                chunk = " ".join(words[:mid] + [f"{filler},"] + words[mid:])
-        processed_chunks.append(chunk)
-    # 3) Rejoin them carefully with a space or nothing
-    #    We'll add a small space after punctuation, so TTS sees them as separate tokens
-    out_text = []
-    for i in range(len(processed_chunks)):
-        if i == 0:
-            out_text.append(processed_chunks[i])
-        else:
-            # If the previous chunk was punctuation or the current chunk is punctuation
-            if processed_chunks[i] in [".", "...", "?", "?!", "!", "!!", ","]:
-                out_text.append(processed_chunks[i])
-            else:
-                out_text.append(" " + processed_chunks[i])
-    final_text = "".join(out_text)
-    return final_text.strip()
 def generate_audio_mp3(text: str, speaker: str) -> str:
     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
-        # Make text more "human-like"
-        text = _make_text_sound_more_human(text)
-        # Define Deepgram API endpoint
-        deepgram_api_url = "https://api.deepgram.com/v1/speak"
-        # Prepare query parameters
-        params = {
-            "model": "aura-asteria-en",  # Default model; adjust if needed
-        }
-        # Override model if needed based on speaker
-        if speaker == "Jane":
-            params["model"] = "aura-asteria-en"
-        elif speaker == "John":
-            params["model"] = "aura-perseus-en"
-        else:
-            raise ValueError(f"Unknown speaker: {speaker}")
-        headers = {
-            "Accept": "audio/mpeg",
-            "Content-Type": "application/json",
-            "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
-        }
-        body = {
-            "text": text
-        }
-        print("[LOG] Sending TTS request to Deepgram...")
-        response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
-        if response.status_code != 200:
-            print(f"[ERROR] Deepgram TTS API returned status code {response.status_code}: {response.text}")
-            raise ValueError(f"Deepgram TTS API error: {response.status_code} - {response.text}")
-        content_type = response.headers.get('Content-Type', '')
-        if 'audio/mpeg' not in content_type:
-            print("[ERROR] Unexpected Content-Type received from Deepgram:", content_type)
-            print("[ERROR] Response content:", response.text)
-            raise ValueError("Unexpected Content-Type received from Deepgram.")
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
-            for chunk in response.iter_content(chunk_size=8192):
-                if chunk:
-                    mp3_file.write(chunk)
-            mp3_temp_path = mp3_file.name
-            print(f"[LOG] Audio received from Deepgram and saved at: {mp3_temp_path}")
-        # Normalize audio volume
-        audio_seg = AudioSegment.from_file(mp3_temp_path, format="mp3")
-        audio_seg = effects.normalize(audio_seg)
         final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
-        audio_seg.export(final_mp3_path, format="mp3")
-        print("[LOG] Audio post-processed and saved at:", final_mp3_path)
-        if os.path.exists(mp3_temp_path):
-            os.remove(mp3_temp_path)
-            print(f"[LOG] Removed temporary MP3 file: {mp3_temp_path}")
         return final_mp3_path
     except Exception as e:
         print("[ERROR] Error generating audio:", e)
         raise ValueError(f"Error generating audio: {str(e)}")
 def transcribe_youtube_video(video_url: str) -> str:
     print("[LOG] Transcribing YouTube video:", video_url)
     fd, audio_file = tempfile.mkstemp(suffix=".wav")
@@ -494,20 +451,17 @@ def transcribe_youtube_video(video_url: str) -> str:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
     except yt_dlp.utils.DownloadError as e:
-        print("[ERROR] yt-dlp download error:", e)
         raise ValueError(f"Error downloading YouTube video: {str(e)}")
     print("[LOG] Audio downloaded at:", audio_file)
     try:
-        # Run ASR on the downloaded audio
         result = asr_pipeline(audio_file)
         transcript = result["text"]
         print("[LOG] Transcription completed.")
         return transcript.strip()
     except Exception as e:
-        print("[ERROR] ASR transcription error:", e)
         raise ValueError(f"Error transcribing YouTube video: {str(e)}")
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
-            print(f"[LOG] Removed temporary audio file: {audio_file}")

 def is_sufficient(text: str, min_word_count: int = 500) -> bool:
     """
     Determines if the fetched information meets the sufficiency criteria.
     """
     word_count = len(text.split())
     print(f"[DEBUG] Aggregated word count: {word_count}")
 def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
     """
     Queries the Groq API to retrieve additional relevant information from the LLM's knowledge base.
     """
     print("[LOG] Querying LLM for additional information.")
     system_prompt = (
         "You are an AI assistant with extensive knowledge up to 2023-10. "
         "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
     )
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     try:
         response = groq_client.chat.completions.create(
             messages=[{"role": "system", "content": system_prompt}],
     summary_parts = []
     wiki_summary = fetch_wikipedia_summary(topic)
     if wiki_summary:
         summary_parts.append(f"From Wikipedia: {wiki_summary}")
     for name, url in sources.items():
         try:
             items = fetch_rss_feed(url)
             if not items:
                 continue
             title, desc, link = find_relevant_article(items, topic, min_match=2)
             if link:
                 article_text = fetch_article_text(link)
     print(aggregated_info)
     if not is_sufficient(aggregated_info):
+        print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
         additional_info = query_llm_for_additional_info(topic, aggregated_info)
         if additional_info:
             aggregated_info += " " + additional_info
         else:
+            print("[ERROR] Failed to retrieve additional info from LLM.")
     if not aggregated_info:
         return f"Sorry, I couldn't find recent information on '{topic}'."
     return aggregated_info
 def fetch_wikipedia_summary(topic: str) -> str:
     print("[LOG] Fetching Wikipedia summary for:", topic)
     try:
         search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}&limit=1&namespace=0&format=json"
         resp = requests.get(search_url)
         if resp.status_code != 200:
+            print(f"[ERROR] Failed to fetch Wikipedia search for topic: {topic}")
             return ""
         data = resp.json()
         if len(data) > 1 and data[1]:
             title = data[1][0]
             summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
             s_resp = requests.get(summary_url)
             if s_resp.status_code == 200:
                 s_data = s_resp.json()
                 if "extract" in s_data:
+                    print("[LOG] Wikipedia summary fetched.")
                     return s_data["extract"]
         return ""
     except Exception as e:
         print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
     try:
         resp = requests.get(feed_url)
         if resp.status_code != 200:
+            print(f"[ERROR] Failed to fetch RSS feed {feed_url}")
             return []
         soup = BeautifulSoup(resp.content, "html.parser")
         items = soup.find_all("item")
+        print(f"[LOG] Number of items: {len(items)} from {feed_url}")
         return items
     except Exception as e:
+        print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
         return []
 def find_relevant_article(items, topic: str, min_match=2) -> tuple:
     print("[LOG] Finding relevant articles...")
     keywords = re.findall(r'\w+', topic.lower())
     for item in items:
         title = item.find("title").get_text().strip() if item.find("title") else ""
         description = item.find("description").get_text().strip() if item.find("description") else ""
         text = f"{title.lower()} {description.lower()}"
         matches = sum(1 for kw in keywords if kw in text)
         if matches >= min_match:
             link = item.find("link").get_text().strip() if item.find("link") else ""
+            print(f"[LOG] Relevant article: {title}")
             return title, description, link
     return None, None, None
 def fetch_article_text(link: str) -> str:
+    print("[LOG] Fetching article text:", link)
     if not link:
         return ""
     try:
         resp = requests.get(link)
         if resp.status_code != 200:
+            print(f"[ERROR] Failed to fetch article with status {resp.status_code}")
             return ""
         soup = BeautifulSoup(resp.text, 'html.parser')
         paragraphs = soup.find_all("p")
         text = " ".join(p.get_text() for p in paragraphs[:5])
         return text.strip()
     except Exception as e:
         print(f"[ERROR] Error fetching article text: {e}")
         "Casual": "like a conversation between close friends, relaxed and informal",
         "Youthful": "like how teenagers might chat, energetic and lively"
     }
     chosen_tone = tone_description.get(tone, "casual")
     prompt = (
         "    ]\n"
         "}"
     )
     print("[LOG] Sending prompt to Groq:")
     print(prompt)
             temperature=0.7
         )
     except Exception as e:
         raise ValueError(f"Error communicating with Groq API: {str(e)}")
     raw_content = response.choices[0].message.content.strip()
+    start_index = raw_content.find('{')
+    end_index = raw_content.rfind('}')
     if start_index == -1 or end_index == -1:
+        raise ValueError("Failed to parse dialogue: No JSON found.")
+    json_str = raw_content[start_index:end_index+1].strip()
+    data = json.loads(json_str)
+    return Dialogue(**data)
+# -------------------------------------------------------------
+# Helper function: Insert random filler words, extra punctuation
+# BUT we'll handle that chunk by chunk (see below).
+# -------------------------------------------------------------
 def _make_text_sound_more_human(text: str) -> str:
     """
+    Inserts small filler words and modifies punctuation
+    for more natural-sounding speech.
     """
     fillers = ["uh", "um", "ah", "hmm", "you know", "well", "I mean", "like"]
+    # Insert filler sometimes at start or middle:
+    if text and random.random() < 0.4:
+        filler = random.choice(fillers)
+        if random.random() < 0.5:
+            text = f"{filler}, {text}"
+        else:
+            words = text.split()
+            mid = len(words) // 2
+            text = " ".join(words[:mid] + [f"{filler},"] + words[mid:])
+    # Possibly turn periods into "..." to force a pause
+    text = re.sub(r'\.(\s|$)', lambda m: "..." + m.group(1), text)
+    # Possibly turn "?" into "?!" or "!!" for exclamation
+    if random.random() < 0.2:
+        text = text.replace("?", "?!")
+    if random.random() < 0.2:
+        text = text.replace("!", "!!")
+    return text.strip()
+def _split_into_sentences_and_phrases(text: str):
+    """
+    Splits the text into smaller chunks so each chunk can be TTS-ed
+    individually for better pacing. We'll look for ., !, or ?
+    as sentence boundaries. Also splits by commas for short phrases.
+    """
+    # Split by sentence enders with a lookbehind to keep delimiters separate.
+    # We can then further split by commas if the sentence is long.
+    # E.g. "Hello there. This is a test?" => ["Hello there.", "This is a test?"]
+    # Then if "Hello there." is too big, we might split by commas as well.
+    boundaries = re.split(r'([.?!])', text)
+    # Rebuild into "sentence + punctuation" pairs
+    phrases = []
+    for i in range(0, len(boundaries), 2):
+        if i + 1 < len(boundaries):
+            chunk = (boundaries[i] + boundaries[i+1]).strip()
+        else:
+            chunk = boundaries[i].strip()
+        if chunk:
+            # Now optionally split chunk by commas if it's too big
+            subparts = chunk.split(',')
+            # If there's more than 1 subpart, rejoin them carefully so each subpart can be TTS-ed on its own
+            for idx, sp in enumerate(subparts):
+                part = sp.strip()
+                if part:
+                    # Re-add comma except on the last one
+                    if idx < len(subparts) - 1:
+                        part += ","
+                    phrases.append(part)
+    return phrases
 def generate_audio_mp3(text: str, speaker: str) -> str:
     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
+        # Step 1: Split text into small pieces (phrases, sentences)
+        fragments = _split_into_sentences_and_phrases(text)
+        # Step 2: For each fragment, transform it to be more human-like, TTS it, then combine
+        all_segments = []
+        for frag in fragments:
+            if not frag.strip():
+                continue
+            # Make the chunk more "human"
+            human_chunk = _make_text_sound_more_human(frag)
+            # TTS this chunk
+            mp3_path = _tts_chunk(human_chunk, speaker)
+            seg = AudioSegment.from_file(mp3_path, format="mp3")
+            seg = effects.normalize(seg)
+            all_segments.append(seg)
+            # Clean up
+            if os.path.exists(mp3_path):
+                os.remove(mp3_path)
+        if not all_segments:
+            raise ValueError("No audio segments produced.")
+        # Step 3: Combine segments with a short silence between
+        final_audio = all_segments[0]
+        short_silence = AudioSegment.silent(duration=300)  # 300ms silence
+        for seg in all_segments[1:]:
+            final_audio = final_audio + short_silence + seg
+        # Step 4: Save combined
         final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+        final_audio.export(final_mp3_path, format="mp3")
+        print("[LOG] Combined audio saved at:", final_mp3_path)
         return final_mp3_path
     except Exception as e:
         print("[ERROR] Error generating audio:", e)
         raise ValueError(f"Error generating audio: {str(e)}")
+def _tts_chunk(text: str, speaker: str) -> str:
+    """
+    Helper function to do TTS on a single chunk of text
+    (so we can call multiple times).
+    """
+    deepgram_api_url = "https://api.deepgram.com/v1/speak"
+    params = {
+        "model": "aura-asteria-en",  # default female
+    }
+    if speaker == "John":
+        params["model"] = "aura-perseus-en"
+    headers = {
+        "Accept": "audio/mpeg",
+        "Content-Type": "application/json",
+        "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
+    }
+    body = {
+        "text": text
+    }
+    response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
+    if response.status_code != 200:
+        raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
+    content_type = response.headers.get('Content-Type', '')
+    if 'audio/mpeg' not in content_type:
+        raise ValueError("Unexpected Content-Type from Deepgram.")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                mp3_file.write(chunk)
+        mp3_path = mp3_file.name
+    return mp3_path
 def transcribe_youtube_video(video_url: str) -> str:
     print("[LOG] Transcribing YouTube video:", video_url)
     fd, audio_file = tempfile.mkstemp(suffix=".wav")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
     except yt_dlp.utils.DownloadError as e:
         raise ValueError(f"Error downloading YouTube video: {str(e)}")
     print("[LOG] Audio downloaded at:", audio_file)
     try:
         result = asr_pipeline(audio_file)
         transcript = result["text"]
         print("[LOG] Transcription completed.")
         return transcript.strip()
     except Exception as e:
         raise ValueError(f"Error transcribing YouTube video: {str(e)}")
     finally:
         if os.path.exists(audio_file):
             os.remove(audio_file)
+            print(f"[LOG] Removed temp audio file: {audio_file}")