siddhartharyaai commited on
Commit
8139f36
·
verified ·
1 Parent(s): fbcd396

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +620 -89
utils.py CHANGED
@@ -7,128 +7,659 @@ from bs4 import BeautifulSoup
7
  from typing import List, Literal
8
  from pydantic import BaseModel
9
  from pydub import AudioSegment, effects
 
10
  import yt_dlp
11
  import tiktoken
12
- from groq import Groq
13
  import numpy as np
14
  import torch
15
- import logging
16
-
17
- logging.basicConfig(filename="debug.log", level=logging.DEBUG)
18
 
19
  class DialogueItem(BaseModel):
20
- speaker: Literal["Jane", "John"]
21
- display_speaker: str = "Jane"
22
  text: str
23
 
24
  class Dialogue(BaseModel):
25
  dialogue: List[DialogueItem]
26
 
27
- def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_style, sponsor_provided):
28
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  try:
31
- logging.info("Calling LLM for script generation.")
32
  response = groq_client.chat.completions.create(
33
- messages=[
34
- {"role": "system", "content": prompt},
35
- {"role": "user", "content": text}
36
- ],
37
- model="DeepSeek-R1-Distill-Llama-70B",
38
- max_tokens=4096,
39
- temperature=0.6
40
  )
 
 
 
 
 
 
 
41
 
42
- if not response.choices or not response.choices[0].message.content:
43
- logging.error("LLM returned an empty response.")
44
- return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="Sorry, no data available.")])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- script_content = response.choices[0].message.content.strip()
47
- dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- if not dialogue_items:
50
- raise ValueError("Script parsing failed.")
51
 
52
- return Dialogue(dialogue=dialogue_items)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
- logging.error(f"Failed to generate script: {str(e)}")
56
- return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
57
 
58
- def extract_text_from_url(url):
59
- headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
- response = requests.get(url, headers=headers)
62
- response.raise_for_status()
63
- soup = BeautifulSoup(response.text, 'html.parser')
64
- for script in soup(["script", "style"]):
65
- script.decompose()
66
- return soup.get_text(separator=' ').strip()
67
- except requests.exceptions.RequestException as e:
68
- logging.error(f"Error extracting text from URL: {str(e)}")
69
- return f"Failed to extract text from URL: {str(e)}"
70
-
71
- def transcribe_youtube_video(video_url):
72
- temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
73
- ydl_opts = {
74
- 'format': 'bestaudio/best',
75
- 'postprocessors': [{
76
- 'key': 'FFmpegExtractAudio',
77
- 'preferredcodec': 'mp3',
78
- 'preferredquality': '192',
79
- }],
80
- 'outtmpl': temp_audio_file.name,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  try:
83
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
84
- ydl.download([video_url])
85
- return transcribe_audio(temp_audio_file.name)
 
 
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- logging.error(f"Error downloading/transcribing YouTube video: {str(e)}")
88
- return f"Error processing YouTube video: {str(e)}"
89
-
90
- def transcribe_audio(file_path):
91
- DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
92
- if not DEEPGRAM_API_KEY:
93
- return "Deepgram API key is missing."
94
- url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
95
- headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}", "Content-Type": "audio/mpeg"}
 
96
  try:
97
- with open(file_path, "rb") as f:
98
- response = requests.post(url, headers=headers, data=f)
99
- response.raise_for_status()
100
- data = response.json()
101
- return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  except Exception as e:
103
- logging.error(f"Error transcribing audio: {str(e)}")
104
- return f"Error transcribing audio: {str(e)}"
105
 
106
- def generate_audio_mp3(text, speaker):
107
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
108
- response = groq_client.chat.completions.create(
109
- messages=[{"role": "system", "content": f"Generate a realistic voice for {speaker}: {text}"}],
110
- model="llama-3.3-70b-versatile",
111
- max_tokens=512,
112
- temperature=0.6
113
- )
114
 
115
- speech_text = response.choices[0].message.content.strip()
116
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
 
 
 
 
 
 
 
117
 
118
  try:
119
- temp_audio.write(speech_text.encode('utf-8'))
120
- temp_audio.close()
 
121
 
122
- audio_test = AudioSegment.from_file(temp_audio.name, format="mp3")
123
- if len(audio_test) == 0:
124
- raise ValueError("Generated MP3 file is empty or corrupted.")
125
 
126
- return temp_audio.name
127
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  except Exception as e:
129
- logging.error(f"Failed to generate MP3 file: {str(e)}")
 
 
 
 
 
 
 
 
 
 
130
 
131
- silent_audio = AudioSegment.silent(duration=2000)
132
- fallback_mp3 = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
133
- silent_audio.export(fallback_mp3.name, format="mp3")
134
- return fallback_mp3.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from typing import List, Literal
8
  from pydantic import BaseModel
9
  from pydub import AudioSegment, effects
10
+ from transformers import pipeline
11
  import yt_dlp
12
  import tiktoken
13
+ from groq import Groq # Retained for other functions if needed
14
  import numpy as np
15
  import torch
16
+ import random
 
 
17
 
18
  class DialogueItem(BaseModel):
19
+ speaker: Literal["Jane", "John"]
20
+ display_speaker: str = "Jane"
21
  text: str
22
 
23
  class Dialogue(BaseModel):
24
  dialogue: List[DialogueItem]
25
 
26
+ asr_pipeline = pipeline(
27
+ "automatic-speech-recognition",
28
+ model="openai/whisper-tiny.en",
29
+ device=0 if torch.cuda.is_available() else -1
30
+ )
31
+
32
+ def truncate_text(text, max_tokens=2048):
33
+ print("[LOG] Truncating text if needed.")
34
+ tokenizer = tiktoken.get_encoding("cl100k_base")
35
+ tokens = tokenizer.encode(text)
36
+ if len(tokens) > max_tokens:
37
+ print("[LOG] Text too long, truncating.")
38
+ return tokenizer.decode(tokens[:max_tokens])
39
+ return text
40
 
41
+ def extract_text_from_url(url):
42
+ print("[LOG] Extracting text from URL:", url)
43
+ try:
44
+ headers = {
45
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
46
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
47
+ "Chrome/115.0.0.0 Safari/537.36")
48
+ }
49
+ response = requests.get(url, headers=headers)
50
+ if response.status_code != 200:
51
+ print(f"[ERROR] Failed to fetch URL: {url} with status code {response.status_code}")
52
+ return ""
53
+ soup = BeautifulSoup(response.text, 'html.parser')
54
+ for script in soup(["script", "style"]):
55
+ script.decompose()
56
+ text = soup.get_text(separator=' ')
57
+ print("[LOG] Text extraction from URL successful.")
58
+ return text
59
+ except Exception as e:
60
+ print(f"[ERROR] Exception during text extraction from URL: {e}")
61
+ return ""
62
+
63
+ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
64
+ print(f"[LOG] Shifting pitch by {semitones} semitones.")
65
+ new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
66
+ shifted_audio = audio._spawn(audio.raw_data, overrides={'frame_rate': new_sample_rate})
67
+ return shifted_audio.set_frame_rate(audio.frame_rate)
68
+
69
+ def is_sufficient(text: str, min_word_count: int = 500) -> bool:
70
+ word_count = len(text.split())
71
+ print(f"[DEBUG] Aggregated word count: {word_count}")
72
+ return word_count >= min_word_count
73
+
74
+ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
75
+ print("[LOG] Querying LLM for additional information.")
76
+ system_prompt = (
77
+ "You are an AI assistant with extensive knowledge up to 2023-10. "
78
+ "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
79
+ f"Topic: {topic}\n\n"
80
+ f"Existing Information: {existing_text}\n\n"
81
+ "Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
82
+ )
83
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
84
  try:
 
85
  response = groq_client.chat.completions.create(
86
+ messages=[{"role": "system", "content": system_prompt}],
87
+ model="llama-3.3-70b-versatile",
88
+ max_tokens=1024,
89
+ temperature=0.7
 
 
 
90
  )
91
+ except Exception as e:
92
+ print("[ERROR] Groq API error during fallback:", e)
93
+ return ""
94
+ additional_info = response.choices[0].message.content.strip()
95
+ print("[DEBUG] Additional information from LLM:")
96
+ print(additional_info)
97
+ return additional_info
98
 
99
+ ## NEW: Updated research_topic that uses Tavily and Open Deep Research Agent
100
+ def research_topic(topic: str) -> str:
101
+ print("[LOG] Researching topic using Tavily API and Open Deep Research Agent:", topic)
102
+ tavily_api_key = os.environ.get("TAVILY_API_KEY")
103
+ if not tavily_api_key:
104
+ print("[ERROR] TAVILY_API_KEY not found in environment variables.")
105
+ return "Tavily API key not configured."
106
+
107
+ tavily_search_url = "https://api.tavily.com/search"
108
+ search_payload = {
109
+ "query": topic,
110
+ "search_depth": "advanced",
111
+ "topic": "general",
112
+ "days": 3,
113
+ "max_results": 5,
114
+ "include_images": False,
115
+ "include_image_descriptions": False,
116
+ "include_answer": True,
117
+ "include_raw_content": True,
118
+ "include_domains": [],
119
+ "exclude_domains": []
120
+ }
121
+ headers = {
122
+ "Authorization": f"Bearer {tavily_api_key}",
123
+ "Content-Type": "application/json"
124
+ }
125
+ try:
126
+ tavily_response = requests.post(tavily_search_url, headers=headers, json=search_payload)
127
+ tavily_response.raise_for_status()
128
+ search_results = tavily_response.json()
129
+ urls = [result.get("link") for result in search_results.get("results", []) if result.get("link")]
130
+ print("[LOG] Tavily search returned URLs:", urls)
131
+ except Exception as e:
132
+ print("[ERROR] Tavily search API error:", e)
133
+ urls = []
134
+
135
+ extracted_contents = []
136
+ tavily_extract_url = "https://api.tavily.com/extract"
137
+ for url in urls:
138
+ extract_payload = {
139
+ "urls": url,
140
+ "include_images": False,
141
+ "extract_depth": "advanced"
142
+ }
143
+ try:
144
+ extract_response = requests.post(tavily_extract_url, headers=headers, json=extract_payload)
145
+ extract_response.raise_for_status()
146
+ extract_data = extract_response.json()
147
+ if "results" in extract_data and len(extract_data["results"]) > 0:
148
+ content = extract_data["results"][0].get("content", "")
149
+ if content:
150
+ extracted_contents.append(content)
151
+ except Exception as e:
152
+ print(f"[ERROR] Tavily extract API error for URL {url}: {e}")
153
+
154
+ combined_content = "\n".join(extracted_contents)
155
+ print("[DEBUG] Combined extracted content length:", len(combined_content))
156
+
157
+ research_prompt = (
158
+ f"Using the latest available information from the internet, generate a comprehensive and detailed research report on the topic '{topic}'. "
159
+ "The report should be structured in the style of a world-class research report, including a Table of Contents, Introduction, Methodology, Findings, Discussion, and Conclusion sections. "
160
+ "Ensure that the report is well-organized, cites recent developments, and provides in-depth analysis. "
161
+ "Use the following extracted content as context:\n\n"
162
+ f"{combined_content}\n\n"
163
+ "If the extracted content is insufficient, perform additional research using reliable internet sources. "
164
+ "Make sure to include all relevant and updated information."
165
+ )
166
+
167
+ deepseek_headers = {
168
+ "Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
169
+ "Content-Type": "application/json"
170
+ }
171
+ deepseek_data = {
172
+ "model": "deepseek/deepseek-r1",
173
+ "messages": [{"role": "user", "content": research_prompt}],
174
+ "max_tokens": 4096,
175
+ "temperature": 0.6
176
+ }
177
+ try:
178
+ deepseek_response = requests.post("https://openrouter.ai/api/v1/chat/completions",
179
+ headers=deepseek_headers, data=json.dumps(deepseek_data))
180
+ deepseek_response.raise_for_status()
181
+ response_json = deepseek_response.json()
182
+ report_content = response_json["choices"][0]["message"]["content"].strip()
183
+ print("[LOG] Research report generated successfully.")
184
+ except Exception as e:
185
+ print("[ERROR] Open Deep Research Agent API error:", e)
186
+ report_content = f"Error generating research report: {str(e)}"
187
+
188
+ return report_content
189
 
190
+ ## NEW: Function to generate a PDF report from text using wkhtmltopdf.
191
+ def generate_pdf_report(report_text: str) -> str:
192
+ """
193
+ Generate a PDF file from the given report text using wkhtmltopdf.
194
+ Returns the path to the generated PDF file.
195
+ """
196
+ import subprocess
197
+ import tempfile
198
+
199
+ html_content = f"""
200
+ <html>
201
+ <head>
202
+ <meta charset="utf-8">
203
+ <title>Research Report</title>
204
+ <style>
205
+ body {{ font-family: Arial, sans-serif; margin: 40px; }}
206
+ h1, h2, h3, h4, h5, h6 {{ color: #333; }}
207
+ </style>
208
+ </head>
209
+ <body>
210
+ {report_text.replace('\n', '<br>')}
211
+ </body>
212
+ </html>
213
+ """
214
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as html_file:
215
+ html_file.write(html_content.encode("utf-8"))
216
+ html_path = html_file.name
217
+
218
+ pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
219
+ try:
220
+ subprocess.run(["wkhtmltopdf", html_path, pdf_path], check=True)
221
+ print("[LOG] PDF report generated at:", pdf_path)
222
+ except Exception as e:
223
+ print("[ERROR] Failed to generate PDF:", e)
224
+ pdf_path = ""
225
+ finally:
226
+ os.remove(html_path)
227
+
228
+ return pdf_path
229
 
230
+ # (The rest of the file remains unchanged.)
 
231
 
232
+ def fetch_wikipedia_summary(topic: str) -> str:
233
+ print("[LOG] Fetching Wikipedia summary for:", topic)
234
+ try:
235
+ search_url = (
236
+ f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
237
+ "&limit=1&namespace=0&format=json"
238
+ )
239
+ resp = requests.get(search_url)
240
+ if resp.status_code != 200:
241
+ print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
242
+ return ""
243
+ data = resp.json()
244
+ if len(data) > 1 and data[1]:
245
+ title = data[1][0]
246
+ summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
247
+ s_resp = requests.get(summary_url)
248
+ if s_resp.status_code == 200:
249
+ s_data = s_resp.json()
250
+ if "extract" in s_data:
251
+ print("[LOG] Wikipedia summary fetched successfully.")
252
+ return s_data["extract"]
253
+ return ""
254
+ except Exception as e:
255
+ print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
256
+ return ""
257
 
258
+ def fetch_rss_feed(feed_url: str) -> list:
259
+ print("[LOG] Fetching RSS feed:", feed_url)
260
+ try:
261
+ resp = requests.get(feed_url)
262
+ if resp.status_code != 200:
263
+ print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
264
+ return []
265
+ soup = BeautifulSoup(resp.content, "xml")
266
+ items = soup.find_all("item")
267
+ return items
268
  except Exception as e:
269
+ print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
270
+ return []
271
 
272
+ def find_relevant_article(items, topic: str, min_match=2) -> tuple:
273
+ print("[LOG] Finding relevant articles...")
274
+ keywords = re.findall(r'\w+', topic.lower())
275
+ for item in items:
276
+ title = item.find("title").get_text().strip() if item.find("title") else ""
277
+ description = item.find("description").get_text().strip() if item.find("description") else ""
278
+ text = (title + " " + description).lower()
279
+ matches = sum(1 for kw in keywords if kw in text)
280
+ if matches >= min_match:
281
+ link = item.find("link").get_text().strip() if item.find("link") else ""
282
+ print(f"[LOG] Relevant article found: {title}")
283
+ return title, description, link
284
+ return None, None, None
285
+
286
+ def fetch_article_text(link: str) -> str:
287
+ print("[LOG] Fetching article text from:", link)
288
+ if not link:
289
+ print("[LOG] No link provided for article text.")
290
+ return ""
291
  try:
292
+ resp = requests.get(link)
293
+ if resp.status_code != 200:
294
+ print(f"[ERROR] Failed to fetch article from {link}")
295
+ return ""
296
+ soup = BeautifulSoup(resp.text, 'html.parser')
297
+ paragraphs = soup.find_all("p")
298
+ text = " ".join(p.get_text() for p in paragraphs[:5])
299
+ print("[LOG] Article text fetched successfully.")
300
+ return text.strip()
301
+ except Exception as e:
302
+ print(f"[ERROR] Error fetching article text: {e}")
303
+ return ""
304
+
305
+ def generate_script(
306
+ system_prompt: str,
307
+ input_text: str,
308
+ tone: str,
309
+ target_length: str,
310
+ host_name: str = "Jane",
311
+ guest_name: str = "John",
312
+ sponsor_style: str = "Separate Break",
313
+ sponsor_provided=None
314
+ ):
315
+ print("[LOG] Generating script with tone:", tone, "and length:", target_length)
316
+ import streamlit as st
317
+ if (host_name == "Jane" or not host_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
318
+ host_name = "Isha"
319
+ if (guest_name == "John" or not guest_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
320
+ guest_name = "Aarav"
321
+
322
+ words_per_minute = 150
323
+ numeric_minutes = 3
324
+ match = re.search(r"(\d+)", target_length)
325
+ if match:
326
+ numeric_minutes = int(match.group(1))
327
+
328
+ min_words = max(50, numeric_minutes * 100)
329
+ max_words = numeric_minutes * words_per_minute
330
+
331
+ tone_map = {
332
+ "Humorous": "funny and exciting, makes people chuckle",
333
+ "Formal": "business-like, well-structured, professional",
334
+ "Casual": "like a conversation between close friends, relaxed and informal",
335
+ "Youthful": "like how teenagers might chat, energetic and lively"
336
  }
337
+ chosen_tone = tone_map.get(tone, "casual")
338
+
339
+ if sponsor_provided:
340
+ if sponsor_style == "Separate Break":
341
+ sponsor_instructions = (
342
+ "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
343
+ "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
344
+ )
345
+ else:
346
+ sponsor_instructions = (
347
+ "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
348
+ "Avoid abrupt transitions."
349
+ )
350
+ else:
351
+ sponsor_instructions = ""
352
+
353
+ prompt = (
354
+ f"{system_prompt}\n"
355
+ f"TONE: {chosen_tone}\n"
356
+ f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
357
+ f"INPUT TEXT: {input_text}\n\n"
358
+ f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
359
+ "Please provide the output in the following JSON format without any additional text:\n\n"
360
+ "{\n"
361
+ ' "dialogue": [\n'
362
+ ' {\n'
363
+ ' "speaker": "Jane",\n'
364
+ ' "text": "..." \n'
365
+ ' },\n'
366
+ ' {\n'
367
+ ' "speaker": "John",\n'
368
+ ' "text": "..." \n'
369
+ ' }\n'
370
+ " ]\n"
371
+ "}"
372
+ )
373
+ print("[LOG] Sending prompt to Deepseek R1 via OpenRouter:")
374
+ print(prompt)
375
+
376
+ if st.session_state.get("language_selection") == "Hinglish":
377
+ prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
378
+ elif st.session_state.get("language_selection") == "Hindi":
379
+ prompt += "\n\nPlease generate the script exclusively in Hindi, using only Hindi vocabulary and grammar without any English words or phrases.\n"
380
+
381
  try:
382
+ headers = {
383
+ "Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
384
+ "Content-Type": "application/json"
385
+ }
386
+ data = {
387
+ "model": "deepseek/deepseek-r1",
388
+ "messages": [{"role": "user", "content": prompt}],
389
+ "max_tokens": 2048,
390
+ "temperature": 0.7
391
+ }
392
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions",
393
+ headers=headers, data=json.dumps(data))
394
+ response.raise_for_status()
395
+ raw_content = response.json()["choices"][0]["message"]["content"].strip()
396
  except Exception as e:
397
+ print("[ERROR] Deepseek API error:", e)
398
+ raise ValueError(f"Error communicating with Deepseek API: {str(e)}")
399
+
400
+ start_index = raw_content.find('{')
401
+ end_index = raw_content.rfind('}')
402
+ if start_index == -1 or end_index == -1:
403
+ raise ValueError("Failed to parse dialogue: No JSON found.")
404
+
405
+ json_str = raw_content[start_index:end_index+1].strip()
406
+
407
  try:
408
+ data = json.loads(json_str)
409
+ dialogue_list = data.get("dialogue", [])
410
+
411
+ for d in dialogue_list:
412
+ raw_speaker = d.get("speaker", "Jane")
413
+ if raw_speaker.lower() == host_name.lower():
414
+ d["speaker"] = "Jane"
415
+ d["display_speaker"] = host_name
416
+ elif raw_speaker.lower() == guest_name.lower():
417
+ d["speaker"] = "John"
418
+ d["display_speaker"] = guest_name
419
+ else:
420
+ d["speaker"] = "Jane"
421
+ d["display_speaker"] = raw_speaker
422
+
423
+ new_dialogue_items = []
424
+ for d in dialogue_list:
425
+ if "display_speaker" not in d:
426
+ d["display_speaker"] = d["speaker"]
427
+ new_dialogue_items.append(DialogueItem(**d))
428
+
429
+ return Dialogue(dialogue=new_dialogue_items)
430
+ except json.JSONDecodeError as e:
431
+ print("[ERROR] JSON decoding (format) failed:", e)
432
+ raise ValueError(f"Failed to parse dialogue: {str(e)}")
433
  except Exception as e:
434
+ print("[ERROR] JSON decoding failed:", e)
435
+ raise ValueError(f"Failed to parse dialogue: {str(e)}")
436
 
437
+ def transcribe_youtube_video(video_url: str) -> str:
438
+ print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
439
+ video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
440
+ if not video_id_match:
441
+ raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
 
 
 
442
 
443
+ video_id = video_id_match.group(1)
444
+ print("[LOG] Extracted video ID:", video_id)
445
+
446
+ base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
447
+ params = {"video_id": video_id, "lang": "en"}
448
+ headers = {
449
+ "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
450
+ "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
451
+ }
452
 
453
  try:
454
+ response = requests.get(base_url, headers=headers, params=params, timeout=30)
455
+ print("[LOG] RapidAPI Response Status Code:", response.status_code)
456
+ print("[LOG] RapidAPI Response Body:", response.text)
457
 
458
+ if response.status_code != 200:
459
+ raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
 
460
 
461
+ data = response.json()
462
+ if not isinstance(data, list) or not data:
463
+ raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
464
+
465
+ transcript_as_text = data[0].get('transcriptionAsText', '').strip()
466
+ if not transcript_as_text:
467
+ raise ValueError("transcriptionAsText field is missing or empty.")
468
+
469
+ print("[LOG] Transcript retrieval successful.")
470
+ print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
471
+ snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
472
+ print(f"[DEBUG] Transcript Snippet: {snippet}")
473
+
474
+ return transcript_as_text
475
+ except Exception as e:
476
+ print("[ERROR] RapidAPI transcription error:", e)
477
+ raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
478
+
479
+ def generate_audio_mp3(text: str, speaker: str) -> str:
480
+ try:
481
+ import streamlit as st
482
+ print(f"[LOG] Generating audio for speaker: {speaker}")
483
+ language_selection = st.session_state.get("language_selection", "English (American)")
484
+ if language_selection == "English (American)":
485
+ print(f"[LOG] Using Deepgram for English (American)")
486
+ if speaker in ["John", "Jane"]:
487
+ processed_text = text
488
+ else:
489
+ processed_text = _preprocess_text_for_tts(text, speaker)
490
+ deepgram_api_url = "https://api.deepgram.com/v1/speak"
491
+ params = {"model": "aura-asteria-en"}
492
+ if speaker == "John":
493
+ params["model"] = "aura-zeus-en"
494
+ headers = {
495
+ "Accept": "audio/mpeg",
496
+ "Content-Type": "application/json",
497
+ "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
498
+ }
499
+ body = {"text": processed_text}
500
+ response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
501
+ if response.status_code != 200:
502
+ raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
503
+ content_type = response.headers.get('Content-Type', '')
504
+ if 'audio/mpeg' not in content_type:
505
+ raise ValueError("Unexpected Content-Type from Deepgram.")
506
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
507
+ for chunk in response.iter_content(chunk_size=8192):
508
+ if chunk:
509
+ mp3_file.write(chunk)
510
+ mp3_path = mp3_file.name
511
+ audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
512
+ audio_seg = effects.normalize(audio_seg)
513
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
514
+ audio_seg.export(final_mp3_path, format="mp3")
515
+ if os.path.exists(mp3_path):
516
+ os.remove(mp3_path)
517
+ return final_mp3_path
518
+ else:
519
+ print(f"[LOG] Using Murf API for language: {language_selection}")
520
+ if language_selection == "Hinglish":
521
+ from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
522
+ text = transliterate(text, DEVANAGARI, IAST)
523
+ api_key = os.environ.get("MURF_API_KEY")
524
+ headers = {
525
+ "Content-Type": "application/json",
526
+ "Accept": "application/json",
527
+ "api-key": api_key
528
+ }
529
+ multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
530
+ if language_selection == "English (Indian)":
531
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
532
+ elif language_selection == "Hindi":
533
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
534
+ elif language_selection == "Hinglish":
535
+ voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
536
+ else:
537
+ voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
538
+ payload = {
539
+ "audioDuration": 0,
540
+ "channelType": "MONO",
541
+ "encodeAsBase64": False,
542
+ "format": "WAV",
543
+ "modelVersion": "GEN2",
544
+ "multiNativeLocale": multi_native_locale,
545
+ "pitch": 0,
546
+ "pronunciationDictionary": {},
547
+ "rate": 0,
548
+ "sampleRate": 48000,
549
+ "style": "Conversational",
550
+ "text": text,
551
+ "variation": 1,
552
+ "voiceId": voice_id
553
+ }
554
+ response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
555
+ if response.status_code != 200:
556
+ raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
557
+ json_resp = response.json()
558
+ audio_url = json_resp.get("audioFile")
559
+ if not audio_url:
560
+ raise ValueError("No audio file URL returned by Murf API")
561
+ audio_response = requests.get(audio_url)
562
+ if audio_response.status_code != 200:
563
+ raise ValueError(f"Error fetching audio from {audio_url}")
564
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
565
+ wav_file.write(audio_response.content)
566
+ wav_path = wav_file.name
567
+ audio_seg = AudioSegment.from_file(wav_path, format="wav")
568
+ audio_seg = effects.normalize(audio_seg)
569
+ final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
570
+ audio_seg.export(final_mp3_path, format="mp3")
571
+ os.remove(wav_path)
572
+ return final_mp3_path
573
+ except Exception as e:
574
+ print("[ERROR] Error generating audio:", e)
575
+ raise ValueError(f"Error generating audio: {str(e)}")
576
+
577
+ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
578
+ pass
579
+
580
+ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
581
+ text = re.sub(r"\bNo\.\b", "Number", text)
582
+ text = re.sub(r"\b(?i)SaaS\b", "sass", text)
583
+ abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
584
+ def insert_periods_for_abbrev(m):
585
+ abbr = m.group(0)
586
+ if abbr in abbreviations_as_words:
587
+ return abbr
588
+ return ".".join(list(abbr)) + "."
589
+ text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
590
+ text = re.sub(r"\.\.", ".", text)
591
+ def remove_periods_for_tts(m):
592
+ return m.group().replace(".", " ").strip()
593
+ text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
594
+ text = re.sub(r"-", " ", text)
595
+ text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
596
+ text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
597
+ text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
598
+ if speaker != "Jane":
599
+ def insert_thinking_pause(m):
600
+ word = m.group(1)
601
+ if random.random() < 0.3:
602
+ filler = random.choice(['hmm,', 'well,', 'let me see,'])
603
+ return f"{word}..., {filler}"
604
+ else:
605
+ return f"{word}...,"
606
+ keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
607
+ text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
608
+ conj_pattern = r"\b(and|but|so|because|however)\b"
609
+ text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
610
+ text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
611
+ def capitalize_match(m):
612
+ return m.group().upper()
613
+ text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
614
+ return text.strip()
615
+
616
+ def _spell_digits(d: str) -> str:
617
+ digit_map = {
618
+ '0': 'zero', '1': 'one', '2': 'two', '3': 'three',
619
+ '4': 'four', '5': 'five', '6': 'six', '7': 'seven',
620
+ '8': 'eight', '9': 'nine'
621
+ }
622
+ return " ".join(digit_map[ch] for ch in d if ch in digit_map)
623
+
624
+ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
625
+ if custom_music_path:
626
+ music_path = custom_music_path
627
+ else:
628
+ music_path = "bg_music.mp3"
629
+
630
+ try:
631
+ bg_music = AudioSegment.from_file(music_path, format="mp3")
632
  except Exception as e:
633
+ print("[ERROR] Failed to load background music:", e)
634
+ return spoken
635
+
636
+ bg_music = bg_music - 18.0
637
+ total_length_ms = len(spoken) + 2000
638
+ looped_music = AudioSegment.empty()
639
+ while len(looped_music) < total_length_ms:
640
+ looped_music += bg_music
641
+ looped_music = looped_music[:total_length_ms]
642
+ final_mix = looped_music.overlay(spoken, position=2000)
643
+ return final_mix
644
 
645
+ def call_groq_api_for_qa(system_prompt: str) -> str:
646
+ try:
647
+ headers = {
648
+ "Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
649
+ "Content-Type": "application/json",
650
+ "Accept": "application/json"
651
+ }
652
+ data = {
653
+ "model": "deepseek/deepseek-r1",
654
+ "messages": [{"role": "user", "content": system_prompt}],
655
+ "max_tokens": 512,
656
+ "temperature": 0.7
657
+ }
658
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions",
659
+ headers=headers, data=json.dumps(data))
660
+ response.raise_for_status()
661
+ return response.json()["choices"][0]["message"]["content"].strip()
662
+ except Exception as e:
663
+ print("[ERROR] Deepseek API error:", e)
664
+ fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
665
+ return json.dumps(fallback)