SearchPod1.0

Sleeping

App Files Files Community

siddhartharyaai commited on Feb 9

Commit

388741c

verified ·

1 Parent(s): 555c75b

Update utils.py

Browse files

Files changed (1) hide show

utils.py +233 -1

utils.py CHANGED Viewed

@@ -259,4 +259,236 @@ def transcribe_youtube_video(video_url: str) -> str:
             raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
         data = response.json()
-        if not isinstance(data, list) or not data:

             raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
         data = response.json()
+        if not isinstance(data, list) or not data:
+            raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
+        transcript_as_text = data[0].get('transcriptionAsText', '').strip()
+        if not transcript_as_text:
+            raise ValueError("transcriptionAsText field is missing or empty.")
+        print("[LOG] Transcript retrieval successful.")
+        print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
+        snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
+        print(f"[DEBUG] Transcript Snippet: {snippet}")
+        return transcript_as_text
+    except Exception as e:
+        print("[ERROR] RapidAPI transcription error:", e)
+        raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
+def generate_audio_mp3(text: str, speaker: str) -> str:
+    try:
+        import streamlit as st
+        print(f"[LOG] Generating audio for speaker: {speaker}")
+        language_selection = st.session_state.get("language_selection", "English (American)")
+        if language_selection == "English (American)":
+            print(f"[LOG] Using Deepgram for English (American)")
+            if speaker in ["John", "Jane"]:
+                processed_text = text
+            else:
+                processed_text = _preprocess_text_for_tts(text, speaker)
+            deepgram_api_url = "https://api.deepgram.com/v1/speak"
+            params = {"model": "aura-asteria-en"}
+            if speaker == "John":
+                params["model"] = "aura-zeus-en"
+            headers = {
+                "Accept": "audio/mpeg",
+                "Content-Type": "application/json",
+                "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
+            }
+            body = {"text": processed_text}
+            response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
+            if response.status_code != 200:
+                raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
+            content_type = response.headers.get('Content-Type', '')
+            if 'audio/mpeg' not in content_type:
+                raise ValueError("Unexpected Content-Type from Deepgram.")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        mp3_file.write(chunk)
+                mp3_path = mp3_file.name
+            audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
+            audio_seg = effects.normalize(audio_seg)
+            final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+            audio_seg.export(final_mp3_path, format="mp3")
+            if os.path.exists(mp3_path):
+                os.remove(mp3_path)
+            return final_mp3_path
+        else:
+            print(f"[LOG] Using Murf API for language: {language_selection}")
+            if language_selection == "Hinglish":
+                from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
+                text = transliterate(text, DEVANAGARI, IAST)
+            api_key = os.environ.get("MURF_API_KEY")
+            headers = {
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+                "api-key": api_key
+            }
+            multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
+            if language_selection == "English (Indian)":
+                voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
+            elif language_selection == "Hindi":
+                voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
+            elif language_selection == "Hinglish":
+                voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
+            else:
+                voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
+            payload = {
+                "audioDuration": 0,
+                "channelType": "MONO",
+                "encodeAsBase64": False,
+                "format": "WAV",
+                "modelVersion": "GEN2",
+                "multiNativeLocale": multi_native_locale,
+                "pitch": 0,
+                "pronunciationDictionary": {},
+                "rate": 0,
+                "sampleRate": 48000,
+                "style": "Conversational",
+                "text": text,
+                "variation": 1,
+                "voiceId": voice_id
+            }
+            response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
+            if response.status_code != 200:
+                raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
+            json_resp = response.json()
+            audio_url = json_resp.get("audioFile")
+            if not audio_url:
+                raise ValueError("No audio file URL returned by Murf API")
+            audio_response = requests.get(audio_url)
+            if audio_response.status_code != 200:
+                raise ValueError(f"Error fetching audio from {audio_url}")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
+                wav_file.write(audio_response.content)
+                wav_path = wav_file.name
+            audio_seg = AudioSegment.from_file(wav_path, format="wav")
+            audio_seg = effects.normalize(audio_seg)
+            final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+            audio_seg.export(final_mp3_path, format="mp3")
+            os.remove(wav_path)
+            return final_mp3_path
+    except Exception as e:
+        print("[ERROR] Error generating audio:", e)
+        raise ValueError(f"Error generating audio: {str(e)}")
+def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
+    pass
+def _preprocess_text_for_tts(text: str, speaker: str) -> str:
+    text = re.sub(r"\bNo\.\b", "Number", text)
+    text = re.sub(r"\b(?i)SaaS\b", "sass", text)
+    abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
+    def insert_periods_for_abbrev(m):
+        abbr = m.group(0)
+        if abbr in abbreviations_as_words:
+            return abbr
+        return ".".join(list(abbr)) + "."
+    text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
+    text = re.sub(r"\.\.", ".", text)
+    def remove_periods_for_tts(m):
+        return m.group().replace(".", " ").strip()
+    text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
+    text = re.sub(r"-", " ", text)
+    text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
+    if speaker != "Jane":
+        def insert_thinking_pause(m):
+            word = m.group(1)
+            if random.random() < 0.3:
+                filler = random.choice(['hmm,', 'well,', 'let me see,'])
+                return f"{word}..., {filler}"
+            else:
+                return f"{word}...,"
+        keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
+        text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
+        conj_pattern = r"\b(and|but|so|because|however)\b"
+        text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
+    def capitalize_match(m):
+        return m.group().upper()
+    text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
+    return text.strip()
+def _spell_digits(d: str) -> str:
+    digit_map = {
+        '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
+        '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
+        '8': 'eight', '9': 'nine'
+    }
+    return " ".join(digit_map[ch] for ch in d if ch in digit_map)
+def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
+    if custom_music_path:
+        music_path = custom_music_path
+    else:
+        music_path = "bg_music.mp3"
+    try:
+        bg_music = AudioSegment.from_file(music_path, format="mp3")
+    except Exception as e:
+        print("[ERROR] Failed to load background music:", e)
+        return spoken
+    bg_music = bg_music - 18.0
+    total_length_ms = len(spoken) + 2000
+    looped_music = AudioSegment.empty()
+    while len(looped_music) < total_length_ms:
+        looped_music += bg_music
+    looped_music = looped_music[:total_length_ms]
+    final_mix = looped_music.overlay(spoken, position=2000)
+    return final_mix
+def call_groq_api_for_qa(system_prompt: str) -> str:
+    #Kept for use, Changed model
+    try:
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}",  # Use GROQ API KEY
+            "Content-Type": "application/json",
+            "Accept": "application/json"
+        }
+        data = {
+             "model": "deepseek-r1-distill-llama-70b", #Using Deepseek
+            "messages": [{"role": "user", "content": system_prompt}],
+            "max_tokens": 512,
+            "temperature": 0.7
+        }
+        response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
+                                 headers=headers, data=json.dumps(data))
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        print("[ERROR] Groq API error:", e)
+        fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
+        return json.dumps(fallback)
+# --- Agent and Tavily Integration ---
+def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
+    """
+    Runs the Open Deep Research agent to generate a research report.
+    Args:
+        topic: The research topic.
+        report_type: The type of report to generate (currently only supports "research_report").
+        max_results: The maximum number of search results to use.
+    Returns:
+        A string containing the generated research report.  Or, in case of error,
+        an error message.
+    """
+    print(f"[LOG] Starting research agent for topic: {topic}")
+    try:
+        agent = OpenDeepResearcher(topic, report_type=report_type, max_results=max_results, tavily_api_key=os.environ.get("TAVILY_API_KEY"))
+        report_content = agent.run()
+        print("[LOG] Research agent completed successfully.")
+        # Now, use the report_structure module to generate the structured report.
+        structured_report = generate_report(report_content)
+        return structured_report
+    except Exception as e:
+        print(f"[ERROR] Error in research agent: {e}")
+        return f"Sorry, I encountered an error during research: {e}"