SearchPod1.0

Running

App Files Files Community

siddhartharyaai commited on Feb 7

Commit

12811af

verified ·

1 Parent(s): 23d3b4d

Update utils.py

Browse files

Files changed (1) hide show

utils.py +32 -24

utils.py CHANGED Viewed

@@ -24,12 +24,38 @@ class DialogueItem(BaseModel):
 class Dialogue(BaseModel):
     dialogue: List[DialogueItem]
 def extract_text_from_url(url):
-    """
-    Extracts readable text from a webpage URL.
-    """
-    logging.info(f"Extracting text from URL: {url}")
     headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
     try:
         response = requests.get(url, headers=headers)
@@ -38,15 +64,11 @@ def extract_text_from_url(url):
         for script in soup(["script", "style"]):
             script.decompose()
         return soup.get_text(separator=' ').strip()
     except requests.exceptions.RequestException as e:
         logging.error(f"Error extracting text from URL: {str(e)}")
         return f"Failed to extract text from URL: {str(e)}"
 def transcribe_youtube_video(video_url):
-    """
-    Uses yt-dlp to extract audio from a YouTube video and transcribe it.
-    """
     temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     ydl_opts = {
         'format': 'bestaudio/best',
@@ -57,40 +79,26 @@ def transcribe_youtube_video(video_url):
         }],
         'outtmpl': temp_audio_file.name,
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
         return transcribe_audio(temp_audio_file.name)
     except Exception as e:
         logging.error(f"Error downloading/transcribing YouTube video: {str(e)}")
         return f"Error processing YouTube video: {str(e)}"
 def transcribe_audio(file_path):
-    """
-    Uses Deepgram API to transcribe audio.
-    """
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
-        logging.error("Deepgram API key is missing.")
         return "Deepgram API key is missing."
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
-    headers = {
-        "Authorization": f"Token {DEEPGRAM_API_KEY}",
-        "Content-Type": "audio/mpeg"
-    }
     try:
         with open(file_path, "rb") as f:
             response = requests.post(url, headers=headers, data=f)
         response.raise_for_status()
         data = response.json()
         return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
     except Exception as e:
         logging.error(f"Error transcribing audio: {str(e)}")
         return f"Error transcribing audio: {str(e)}"

 class Dialogue(BaseModel):
     dialogue: List[DialogueItem]
+def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_style, sponsor_provided):
+    groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    try:
+        logging.info("Calling LLM for script generation.")
+        response = groq_client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": text}
+            ],
+            model="DeepSeek-R1-Distill-Llama-70B",
+            max_tokens=4096,
+            temperature=0.6
+        )
+        if not response.choices or not response.choices[0].message.content:
+            logging.error("LLM returned an empty response.")
+            return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="Sorry, no data available.")])
+        script_content = response.choices[0].message.content.strip()
+        dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
+        if not dialogue_items:
+            raise ValueError("Script parsing failed.")
+        return Dialogue(dialogue=dialogue_items)
+    except Exception as e:
+        logging.error(f"Failed to generate script: {str(e)}")
+        return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
 def extract_text_from_url(url):
     headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
     try:
         response = requests.get(url, headers=headers)
         for script in soup(["script", "style"]):
             script.decompose()
         return soup.get_text(separator=' ').strip()
     except requests.exceptions.RequestException as e:
         logging.error(f"Error extracting text from URL: {str(e)}")
         return f"Failed to extract text from URL: {str(e)}"
 def transcribe_youtube_video(video_url):
     temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     ydl_opts = {
         'format': 'bestaudio/best',
         }],
         'outtmpl': temp_audio_file.name,
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([video_url])
         return transcribe_audio(temp_audio_file.name)
     except Exception as e:
         logging.error(f"Error downloading/transcribing YouTube video: {str(e)}")
         return f"Error processing YouTube video: {str(e)}"
 def transcribe_audio(file_path):
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
         return "Deepgram API key is missing."
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}", "Content-Type": "audio/mpeg"}
     try:
         with open(file_path, "rb") as f:
             response = requests.post(url, headers=headers, data=f)
         response.raise_for_status()
         data = response.json()
         return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
     except Exception as e:
         logging.error(f"Error transcribing audio: {str(e)}")
         return f"Error transcribing audio: {str(e)}"