Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -7,128 +7,659 @@ from bs4 import BeautifulSoup
|
|
7 |
from typing import List, Literal
|
8 |
from pydantic import BaseModel
|
9 |
from pydub import AudioSegment, effects
|
|
|
10 |
import yt_dlp
|
11 |
import tiktoken
|
12 |
-
from groq import Groq
|
13 |
import numpy as np
|
14 |
import torch
|
15 |
-
import
|
16 |
-
|
17 |
-
logging.basicConfig(filename="debug.log", level=logging.DEBUG)
|
18 |
|
19 |
class DialogueItem(BaseModel):
|
20 |
-
speaker: Literal["Jane", "John"]
|
21 |
-
display_speaker: str = "Jane"
|
22 |
text: str
|
23 |
|
24 |
class Dialogue(BaseModel):
|
25 |
dialogue: List[DialogueItem]
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
try:
|
31 |
-
logging.info("Calling LLM for script generation.")
|
32 |
response = groq_client.chat.completions.create(
|
33 |
-
messages=[
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
model="DeepSeek-R1-Distill-Llama-70B",
|
38 |
-
max_tokens=4096,
|
39 |
-
temperature=0.6
|
40 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
raise ValueError("Script parsing failed.")
|
51 |
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
except Exception as e:
|
55 |
-
|
56 |
-
return
|
57 |
|
58 |
-
def
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
try:
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
return
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
try:
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
except Exception as e:
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
if
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
96 |
try:
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
except Exception as e:
|
103 |
-
|
104 |
-
|
105 |
|
106 |
-
def
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
max_tokens=512,
|
112 |
-
temperature=0.6
|
113 |
-
)
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
try:
|
119 |
-
|
120 |
-
|
|
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
raise ValueError("Generated MP3 file is empty or corrupted.")
|
125 |
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
except Exception as e:
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from typing import List, Literal
|
8 |
from pydantic import BaseModel
|
9 |
from pydub import AudioSegment, effects
|
10 |
+
from transformers import pipeline
|
11 |
import yt_dlp
|
12 |
import tiktoken
|
13 |
+
from groq import Groq # Retained for other functions if needed
|
14 |
import numpy as np
|
15 |
import torch
|
16 |
+
import random
|
|
|
|
|
17 |
|
18 |
class DialogueItem(BaseModel):
|
19 |
+
speaker: Literal["Jane", "John"]
|
20 |
+
display_speaker: str = "Jane"
|
21 |
text: str
|
22 |
|
23 |
class Dialogue(BaseModel):
|
24 |
dialogue: List[DialogueItem]
|
25 |
|
26 |
+
asr_pipeline = pipeline(
|
27 |
+
"automatic-speech-recognition",
|
28 |
+
model="openai/whisper-tiny.en",
|
29 |
+
device=0 if torch.cuda.is_available() else -1
|
30 |
+
)
|
31 |
+
|
32 |
+
def truncate_text(text, max_tokens=2048):
|
33 |
+
print("[LOG] Truncating text if needed.")
|
34 |
+
tokenizer = tiktoken.get_encoding("cl100k_base")
|
35 |
+
tokens = tokenizer.encode(text)
|
36 |
+
if len(tokens) > max_tokens:
|
37 |
+
print("[LOG] Text too long, truncating.")
|
38 |
+
return tokenizer.decode(tokens[:max_tokens])
|
39 |
+
return text
|
40 |
|
41 |
+
def extract_text_from_url(url):
|
42 |
+
print("[LOG] Extracting text from URL:", url)
|
43 |
+
try:
|
44 |
+
headers = {
|
45 |
+
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
46 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
47 |
+
"Chrome/115.0.0.0 Safari/537.36")
|
48 |
+
}
|
49 |
+
response = requests.get(url, headers=headers)
|
50 |
+
if response.status_code != 200:
|
51 |
+
print(f"[ERROR] Failed to fetch URL: {url} with status code {response.status_code}")
|
52 |
+
return ""
|
53 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
54 |
+
for script in soup(["script", "style"]):
|
55 |
+
script.decompose()
|
56 |
+
text = soup.get_text(separator=' ')
|
57 |
+
print("[LOG] Text extraction from URL successful.")
|
58 |
+
return text
|
59 |
+
except Exception as e:
|
60 |
+
print(f"[ERROR] Exception during text extraction from URL: {e}")
|
61 |
+
return ""
|
62 |
+
|
63 |
+
def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
|
64 |
+
print(f"[LOG] Shifting pitch by {semitones} semitones.")
|
65 |
+
new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
|
66 |
+
shifted_audio = audio._spawn(audio.raw_data, overrides={'frame_rate': new_sample_rate})
|
67 |
+
return shifted_audio.set_frame_rate(audio.frame_rate)
|
68 |
+
|
69 |
+
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
70 |
+
word_count = len(text.split())
|
71 |
+
print(f"[DEBUG] Aggregated word count: {word_count}")
|
72 |
+
return word_count >= min_word_count
|
73 |
+
|
74 |
+
def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
75 |
+
print("[LOG] Querying LLM for additional information.")
|
76 |
+
system_prompt = (
|
77 |
+
"You are an AI assistant with extensive knowledge up to 2023-10. "
|
78 |
+
"Provide additional relevant information on the following topic based on your knowledge base.\n\n"
|
79 |
+
f"Topic: {topic}\n\n"
|
80 |
+
f"Existing Information: {existing_text}\n\n"
|
81 |
+
"Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
|
82 |
+
)
|
83 |
+
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
84 |
try:
|
|
|
85 |
response = groq_client.chat.completions.create(
|
86 |
+
messages=[{"role": "system", "content": system_prompt}],
|
87 |
+
model="llama-3.3-70b-versatile",
|
88 |
+
max_tokens=1024,
|
89 |
+
temperature=0.7
|
|
|
|
|
|
|
90 |
)
|
91 |
+
except Exception as e:
|
92 |
+
print("[ERROR] Groq API error during fallback:", e)
|
93 |
+
return ""
|
94 |
+
additional_info = response.choices[0].message.content.strip()
|
95 |
+
print("[DEBUG] Additional information from LLM:")
|
96 |
+
print(additional_info)
|
97 |
+
return additional_info
|
98 |
|
99 |
+
## NEW: Updated research_topic that uses Tavily and Open Deep Research Agent
|
100 |
+
def research_topic(topic: str) -> str:
|
101 |
+
print("[LOG] Researching topic using Tavily API and Open Deep Research Agent:", topic)
|
102 |
+
tavily_api_key = os.environ.get("TAVILY_API_KEY")
|
103 |
+
if not tavily_api_key:
|
104 |
+
print("[ERROR] TAVILY_API_KEY not found in environment variables.")
|
105 |
+
return "Tavily API key not configured."
|
106 |
+
|
107 |
+
tavily_search_url = "https://api.tavily.com/search"
|
108 |
+
search_payload = {
|
109 |
+
"query": topic,
|
110 |
+
"search_depth": "advanced",
|
111 |
+
"topic": "general",
|
112 |
+
"days": 3,
|
113 |
+
"max_results": 5,
|
114 |
+
"include_images": False,
|
115 |
+
"include_image_descriptions": False,
|
116 |
+
"include_answer": True,
|
117 |
+
"include_raw_content": True,
|
118 |
+
"include_domains": [],
|
119 |
+
"exclude_domains": []
|
120 |
+
}
|
121 |
+
headers = {
|
122 |
+
"Authorization": f"Bearer {tavily_api_key}",
|
123 |
+
"Content-Type": "application/json"
|
124 |
+
}
|
125 |
+
try:
|
126 |
+
tavily_response = requests.post(tavily_search_url, headers=headers, json=search_payload)
|
127 |
+
tavily_response.raise_for_status()
|
128 |
+
search_results = tavily_response.json()
|
129 |
+
urls = [result.get("link") for result in search_results.get("results", []) if result.get("link")]
|
130 |
+
print("[LOG] Tavily search returned URLs:", urls)
|
131 |
+
except Exception as e:
|
132 |
+
print("[ERROR] Tavily search API error:", e)
|
133 |
+
urls = []
|
134 |
+
|
135 |
+
extracted_contents = []
|
136 |
+
tavily_extract_url = "https://api.tavily.com/extract"
|
137 |
+
for url in urls:
|
138 |
+
extract_payload = {
|
139 |
+
"urls": url,
|
140 |
+
"include_images": False,
|
141 |
+
"extract_depth": "advanced"
|
142 |
+
}
|
143 |
+
try:
|
144 |
+
extract_response = requests.post(tavily_extract_url, headers=headers, json=extract_payload)
|
145 |
+
extract_response.raise_for_status()
|
146 |
+
extract_data = extract_response.json()
|
147 |
+
if "results" in extract_data and len(extract_data["results"]) > 0:
|
148 |
+
content = extract_data["results"][0].get("content", "")
|
149 |
+
if content:
|
150 |
+
extracted_contents.append(content)
|
151 |
+
except Exception as e:
|
152 |
+
print(f"[ERROR] Tavily extract API error for URL {url}: {e}")
|
153 |
+
|
154 |
+
combined_content = "\n".join(extracted_contents)
|
155 |
+
print("[DEBUG] Combined extracted content length:", len(combined_content))
|
156 |
+
|
157 |
+
research_prompt = (
|
158 |
+
f"Using the latest available information from the internet, generate a comprehensive and detailed research report on the topic '{topic}'. "
|
159 |
+
"The report should be structured in the style of a world-class research report, including a Table of Contents, Introduction, Methodology, Findings, Discussion, and Conclusion sections. "
|
160 |
+
"Ensure that the report is well-organized, cites recent developments, and provides in-depth analysis. "
|
161 |
+
"Use the following extracted content as context:\n\n"
|
162 |
+
f"{combined_content}\n\n"
|
163 |
+
"If the extracted content is insufficient, perform additional research using reliable internet sources. "
|
164 |
+
"Make sure to include all relevant and updated information."
|
165 |
+
)
|
166 |
+
|
167 |
+
deepseek_headers = {
|
168 |
+
"Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
|
169 |
+
"Content-Type": "application/json"
|
170 |
+
}
|
171 |
+
deepseek_data = {
|
172 |
+
"model": "deepseek/deepseek-r1",
|
173 |
+
"messages": [{"role": "user", "content": research_prompt}],
|
174 |
+
"max_tokens": 4096,
|
175 |
+
"temperature": 0.6
|
176 |
+
}
|
177 |
+
try:
|
178 |
+
deepseek_response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
179 |
+
headers=deepseek_headers, data=json.dumps(deepseek_data))
|
180 |
+
deepseek_response.raise_for_status()
|
181 |
+
response_json = deepseek_response.json()
|
182 |
+
report_content = response_json["choices"][0]["message"]["content"].strip()
|
183 |
+
print("[LOG] Research report generated successfully.")
|
184 |
+
except Exception as e:
|
185 |
+
print("[ERROR] Open Deep Research Agent API error:", e)
|
186 |
+
report_content = f"Error generating research report: {str(e)}"
|
187 |
+
|
188 |
+
return report_content
|
189 |
|
190 |
+
## NEW: Function to generate a PDF report from text using wkhtmltopdf.
|
191 |
+
def generate_pdf_report(report_text: str) -> str:
|
192 |
+
"""
|
193 |
+
Generate a PDF file from the given report text using wkhtmltopdf.
|
194 |
+
Returns the path to the generated PDF file.
|
195 |
+
"""
|
196 |
+
import subprocess
|
197 |
+
import tempfile
|
198 |
+
|
199 |
+
html_content = f"""
|
200 |
+
<html>
|
201 |
+
<head>
|
202 |
+
<meta charset="utf-8">
|
203 |
+
<title>Research Report</title>
|
204 |
+
<style>
|
205 |
+
body {{ font-family: Arial, sans-serif; margin: 40px; }}
|
206 |
+
h1, h2, h3, h4, h5, h6 {{ color: #333; }}
|
207 |
+
</style>
|
208 |
+
</head>
|
209 |
+
<body>
|
210 |
+
{report_text.replace('\n', '<br>')}
|
211 |
+
</body>
|
212 |
+
</html>
|
213 |
+
"""
|
214 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as html_file:
|
215 |
+
html_file.write(html_content.encode("utf-8"))
|
216 |
+
html_path = html_file.name
|
217 |
+
|
218 |
+
pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
|
219 |
+
try:
|
220 |
+
subprocess.run(["wkhtmltopdf", html_path, pdf_path], check=True)
|
221 |
+
print("[LOG] PDF report generated at:", pdf_path)
|
222 |
+
except Exception as e:
|
223 |
+
print("[ERROR] Failed to generate PDF:", e)
|
224 |
+
pdf_path = ""
|
225 |
+
finally:
|
226 |
+
os.remove(html_path)
|
227 |
+
|
228 |
+
return pdf_path
|
229 |
|
230 |
+
# (The rest of the file remains unchanged.)
|
|
|
231 |
|
232 |
+
def fetch_wikipedia_summary(topic: str) -> str:
|
233 |
+
print("[LOG] Fetching Wikipedia summary for:", topic)
|
234 |
+
try:
|
235 |
+
search_url = (
|
236 |
+
f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
|
237 |
+
"&limit=1&namespace=0&format=json"
|
238 |
+
)
|
239 |
+
resp = requests.get(search_url)
|
240 |
+
if resp.status_code != 200:
|
241 |
+
print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
|
242 |
+
return ""
|
243 |
+
data = resp.json()
|
244 |
+
if len(data) > 1 and data[1]:
|
245 |
+
title = data[1][0]
|
246 |
+
summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
|
247 |
+
s_resp = requests.get(summary_url)
|
248 |
+
if s_resp.status_code == 200:
|
249 |
+
s_data = s_resp.json()
|
250 |
+
if "extract" in s_data:
|
251 |
+
print("[LOG] Wikipedia summary fetched successfully.")
|
252 |
+
return s_data["extract"]
|
253 |
+
return ""
|
254 |
+
except Exception as e:
|
255 |
+
print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
|
256 |
+
return ""
|
257 |
|
258 |
+
def fetch_rss_feed(feed_url: str) -> list:
|
259 |
+
print("[LOG] Fetching RSS feed:", feed_url)
|
260 |
+
try:
|
261 |
+
resp = requests.get(feed_url)
|
262 |
+
if resp.status_code != 200:
|
263 |
+
print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
|
264 |
+
return []
|
265 |
+
soup = BeautifulSoup(resp.content, "xml")
|
266 |
+
items = soup.find_all("item")
|
267 |
+
return items
|
268 |
except Exception as e:
|
269 |
+
print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
|
270 |
+
return []
|
271 |
|
272 |
+
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
273 |
+
print("[LOG] Finding relevant articles...")
|
274 |
+
keywords = re.findall(r'\w+', topic.lower())
|
275 |
+
for item in items:
|
276 |
+
title = item.find("title").get_text().strip() if item.find("title") else ""
|
277 |
+
description = item.find("description").get_text().strip() if item.find("description") else ""
|
278 |
+
text = (title + " " + description).lower()
|
279 |
+
matches = sum(1 for kw in keywords if kw in text)
|
280 |
+
if matches >= min_match:
|
281 |
+
link = item.find("link").get_text().strip() if item.find("link") else ""
|
282 |
+
print(f"[LOG] Relevant article found: {title}")
|
283 |
+
return title, description, link
|
284 |
+
return None, None, None
|
285 |
+
|
286 |
+
def fetch_article_text(link: str) -> str:
|
287 |
+
print("[LOG] Fetching article text from:", link)
|
288 |
+
if not link:
|
289 |
+
print("[LOG] No link provided for article text.")
|
290 |
+
return ""
|
291 |
try:
|
292 |
+
resp = requests.get(link)
|
293 |
+
if resp.status_code != 200:
|
294 |
+
print(f"[ERROR] Failed to fetch article from {link}")
|
295 |
+
return ""
|
296 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
297 |
+
paragraphs = soup.find_all("p")
|
298 |
+
text = " ".join(p.get_text() for p in paragraphs[:5])
|
299 |
+
print("[LOG] Article text fetched successfully.")
|
300 |
+
return text.strip()
|
301 |
+
except Exception as e:
|
302 |
+
print(f"[ERROR] Error fetching article text: {e}")
|
303 |
+
return ""
|
304 |
+
|
305 |
+
def generate_script(
|
306 |
+
system_prompt: str,
|
307 |
+
input_text: str,
|
308 |
+
tone: str,
|
309 |
+
target_length: str,
|
310 |
+
host_name: str = "Jane",
|
311 |
+
guest_name: str = "John",
|
312 |
+
sponsor_style: str = "Separate Break",
|
313 |
+
sponsor_provided=None
|
314 |
+
):
|
315 |
+
print("[LOG] Generating script with tone:", tone, "and length:", target_length)
|
316 |
+
import streamlit as st
|
317 |
+
if (host_name == "Jane" or not host_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
|
318 |
+
host_name = "Isha"
|
319 |
+
if (guest_name == "John" or not guest_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
|
320 |
+
guest_name = "Aarav"
|
321 |
+
|
322 |
+
words_per_minute = 150
|
323 |
+
numeric_minutes = 3
|
324 |
+
match = re.search(r"(\d+)", target_length)
|
325 |
+
if match:
|
326 |
+
numeric_minutes = int(match.group(1))
|
327 |
+
|
328 |
+
min_words = max(50, numeric_minutes * 100)
|
329 |
+
max_words = numeric_minutes * words_per_minute
|
330 |
+
|
331 |
+
tone_map = {
|
332 |
+
"Humorous": "funny and exciting, makes people chuckle",
|
333 |
+
"Formal": "business-like, well-structured, professional",
|
334 |
+
"Casual": "like a conversation between close friends, relaxed and informal",
|
335 |
+
"Youthful": "like how teenagers might chat, energetic and lively"
|
336 |
}
|
337 |
+
chosen_tone = tone_map.get(tone, "casual")
|
338 |
+
|
339 |
+
if sponsor_provided:
|
340 |
+
if sponsor_style == "Separate Break":
|
341 |
+
sponsor_instructions = (
|
342 |
+
"If sponsor content is provided, include it in a separate ad break (~30 seconds). "
|
343 |
+
"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
|
344 |
+
)
|
345 |
+
else:
|
346 |
+
sponsor_instructions = (
|
347 |
+
"If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
|
348 |
+
"Avoid abrupt transitions."
|
349 |
+
)
|
350 |
+
else:
|
351 |
+
sponsor_instructions = ""
|
352 |
+
|
353 |
+
prompt = (
|
354 |
+
f"{system_prompt}\n"
|
355 |
+
f"TONE: {chosen_tone}\n"
|
356 |
+
f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
|
357 |
+
f"INPUT TEXT: {input_text}\n\n"
|
358 |
+
f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
|
359 |
+
"Please provide the output in the following JSON format without any additional text:\n\n"
|
360 |
+
"{\n"
|
361 |
+
' "dialogue": [\n'
|
362 |
+
' {\n'
|
363 |
+
' "speaker": "Jane",\n'
|
364 |
+
' "text": "..." \n'
|
365 |
+
' },\n'
|
366 |
+
' {\n'
|
367 |
+
' "speaker": "John",\n'
|
368 |
+
' "text": "..." \n'
|
369 |
+
' }\n'
|
370 |
+
" ]\n"
|
371 |
+
"}"
|
372 |
+
)
|
373 |
+
print("[LOG] Sending prompt to Deepseek R1 via OpenRouter:")
|
374 |
+
print(prompt)
|
375 |
+
|
376 |
+
if st.session_state.get("language_selection") == "Hinglish":
|
377 |
+
prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
|
378 |
+
elif st.session_state.get("language_selection") == "Hindi":
|
379 |
+
prompt += "\n\nPlease generate the script exclusively in Hindi, using only Hindi vocabulary and grammar without any English words or phrases.\n"
|
380 |
+
|
381 |
try:
|
382 |
+
headers = {
|
383 |
+
"Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
|
384 |
+
"Content-Type": "application/json"
|
385 |
+
}
|
386 |
+
data = {
|
387 |
+
"model": "deepseek/deepseek-r1",
|
388 |
+
"messages": [{"role": "user", "content": prompt}],
|
389 |
+
"max_tokens": 2048,
|
390 |
+
"temperature": 0.7
|
391 |
+
}
|
392 |
+
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
393 |
+
headers=headers, data=json.dumps(data))
|
394 |
+
response.raise_for_status()
|
395 |
+
raw_content = response.json()["choices"][0]["message"]["content"].strip()
|
396 |
except Exception as e:
|
397 |
+
print("[ERROR] Deepseek API error:", e)
|
398 |
+
raise ValueError(f"Error communicating with Deepseek API: {str(e)}")
|
399 |
+
|
400 |
+
start_index = raw_content.find('{')
|
401 |
+
end_index = raw_content.rfind('}')
|
402 |
+
if start_index == -1 or end_index == -1:
|
403 |
+
raise ValueError("Failed to parse dialogue: No JSON found.")
|
404 |
+
|
405 |
+
json_str = raw_content[start_index:end_index+1].strip()
|
406 |
+
|
407 |
try:
|
408 |
+
data = json.loads(json_str)
|
409 |
+
dialogue_list = data.get("dialogue", [])
|
410 |
+
|
411 |
+
for d in dialogue_list:
|
412 |
+
raw_speaker = d.get("speaker", "Jane")
|
413 |
+
if raw_speaker.lower() == host_name.lower():
|
414 |
+
d["speaker"] = "Jane"
|
415 |
+
d["display_speaker"] = host_name
|
416 |
+
elif raw_speaker.lower() == guest_name.lower():
|
417 |
+
d["speaker"] = "John"
|
418 |
+
d["display_speaker"] = guest_name
|
419 |
+
else:
|
420 |
+
d["speaker"] = "Jane"
|
421 |
+
d["display_speaker"] = raw_speaker
|
422 |
+
|
423 |
+
new_dialogue_items = []
|
424 |
+
for d in dialogue_list:
|
425 |
+
if "display_speaker" not in d:
|
426 |
+
d["display_speaker"] = d["speaker"]
|
427 |
+
new_dialogue_items.append(DialogueItem(**d))
|
428 |
+
|
429 |
+
return Dialogue(dialogue=new_dialogue_items)
|
430 |
+
except json.JSONDecodeError as e:
|
431 |
+
print("[ERROR] JSON decoding (format) failed:", e)
|
432 |
+
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
433 |
except Exception as e:
|
434 |
+
print("[ERROR] JSON decoding failed:", e)
|
435 |
+
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
436 |
|
437 |
+
def transcribe_youtube_video(video_url: str) -> str:
|
438 |
+
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
439 |
+
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
440 |
+
if not video_id_match:
|
441 |
+
raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
|
|
|
|
|
|
|
442 |
|
443 |
+
video_id = video_id_match.group(1)
|
444 |
+
print("[LOG] Extracted video ID:", video_id)
|
445 |
+
|
446 |
+
base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
447 |
+
params = {"video_id": video_id, "lang": "en"}
|
448 |
+
headers = {
|
449 |
+
"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
|
450 |
+
"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
|
451 |
+
}
|
452 |
|
453 |
try:
|
454 |
+
response = requests.get(base_url, headers=headers, params=params, timeout=30)
|
455 |
+
print("[LOG] RapidAPI Response Status Code:", response.status_code)
|
456 |
+
print("[LOG] RapidAPI Response Body:", response.text)
|
457 |
|
458 |
+
if response.status_code != 200:
|
459 |
+
raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
|
|
|
460 |
|
461 |
+
data = response.json()
|
462 |
+
if not isinstance(data, list) or not data:
|
463 |
+
raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
|
464 |
+
|
465 |
+
transcript_as_text = data[0].get('transcriptionAsText', '').strip()
|
466 |
+
if not transcript_as_text:
|
467 |
+
raise ValueError("transcriptionAsText field is missing or empty.")
|
468 |
+
|
469 |
+
print("[LOG] Transcript retrieval successful.")
|
470 |
+
print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
|
471 |
+
snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
|
472 |
+
print(f"[DEBUG] Transcript Snippet: {snippet}")
|
473 |
+
|
474 |
+
return transcript_as_text
|
475 |
+
except Exception as e:
|
476 |
+
print("[ERROR] RapidAPI transcription error:", e)
|
477 |
+
raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
|
478 |
+
|
479 |
+
def generate_audio_mp3(text: str, speaker: str) -> str:
|
480 |
+
try:
|
481 |
+
import streamlit as st
|
482 |
+
print(f"[LOG] Generating audio for speaker: {speaker}")
|
483 |
+
language_selection = st.session_state.get("language_selection", "English (American)")
|
484 |
+
if language_selection == "English (American)":
|
485 |
+
print(f"[LOG] Using Deepgram for English (American)")
|
486 |
+
if speaker in ["John", "Jane"]:
|
487 |
+
processed_text = text
|
488 |
+
else:
|
489 |
+
processed_text = _preprocess_text_for_tts(text, speaker)
|
490 |
+
deepgram_api_url = "https://api.deepgram.com/v1/speak"
|
491 |
+
params = {"model": "aura-asteria-en"}
|
492 |
+
if speaker == "John":
|
493 |
+
params["model"] = "aura-zeus-en"
|
494 |
+
headers = {
|
495 |
+
"Accept": "audio/mpeg",
|
496 |
+
"Content-Type": "application/json",
|
497 |
+
"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
|
498 |
+
}
|
499 |
+
body = {"text": processed_text}
|
500 |
+
response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
|
501 |
+
if response.status_code != 200:
|
502 |
+
raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
|
503 |
+
content_type = response.headers.get('Content-Type', '')
|
504 |
+
if 'audio/mpeg' not in content_type:
|
505 |
+
raise ValueError("Unexpected Content-Type from Deepgram.")
|
506 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
|
507 |
+
for chunk in response.iter_content(chunk_size=8192):
|
508 |
+
if chunk:
|
509 |
+
mp3_file.write(chunk)
|
510 |
+
mp3_path = mp3_file.name
|
511 |
+
audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
|
512 |
+
audio_seg = effects.normalize(audio_seg)
|
513 |
+
final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
514 |
+
audio_seg.export(final_mp3_path, format="mp3")
|
515 |
+
if os.path.exists(mp3_path):
|
516 |
+
os.remove(mp3_path)
|
517 |
+
return final_mp3_path
|
518 |
+
else:
|
519 |
+
print(f"[LOG] Using Murf API for language: {language_selection}")
|
520 |
+
if language_selection == "Hinglish":
|
521 |
+
from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
|
522 |
+
text = transliterate(text, DEVANAGARI, IAST)
|
523 |
+
api_key = os.environ.get("MURF_API_KEY")
|
524 |
+
headers = {
|
525 |
+
"Content-Type": "application/json",
|
526 |
+
"Accept": "application/json",
|
527 |
+
"api-key": api_key
|
528 |
+
}
|
529 |
+
multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
|
530 |
+
if language_selection == "English (Indian)":
|
531 |
+
voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
|
532 |
+
elif language_selection == "Hindi":
|
533 |
+
voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
|
534 |
+
elif language_selection == "Hinglish":
|
535 |
+
voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
|
536 |
+
else:
|
537 |
+
voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
|
538 |
+
payload = {
|
539 |
+
"audioDuration": 0,
|
540 |
+
"channelType": "MONO",
|
541 |
+
"encodeAsBase64": False,
|
542 |
+
"format": "WAV",
|
543 |
+
"modelVersion": "GEN2",
|
544 |
+
"multiNativeLocale": multi_native_locale,
|
545 |
+
"pitch": 0,
|
546 |
+
"pronunciationDictionary": {},
|
547 |
+
"rate": 0,
|
548 |
+
"sampleRate": 48000,
|
549 |
+
"style": "Conversational",
|
550 |
+
"text": text,
|
551 |
+
"variation": 1,
|
552 |
+
"voiceId": voice_id
|
553 |
+
}
|
554 |
+
response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
|
555 |
+
if response.status_code != 200:
|
556 |
+
raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
|
557 |
+
json_resp = response.json()
|
558 |
+
audio_url = json_resp.get("audioFile")
|
559 |
+
if not audio_url:
|
560 |
+
raise ValueError("No audio file URL returned by Murf API")
|
561 |
+
audio_response = requests.get(audio_url)
|
562 |
+
if audio_response.status_code != 200:
|
563 |
+
raise ValueError(f"Error fetching audio from {audio_url}")
|
564 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
|
565 |
+
wav_file.write(audio_response.content)
|
566 |
+
wav_path = wav_file.name
|
567 |
+
audio_seg = AudioSegment.from_file(wav_path, format="wav")
|
568 |
+
audio_seg = effects.normalize(audio_seg)
|
569 |
+
final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
570 |
+
audio_seg.export(final_mp3_path, format="mp3")
|
571 |
+
os.remove(wav_path)
|
572 |
+
return final_mp3_path
|
573 |
+
except Exception as e:
|
574 |
+
print("[ERROR] Error generating audio:", e)
|
575 |
+
raise ValueError(f"Error generating audio: {str(e)}")
|
576 |
+
|
577 |
+
def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
|
578 |
+
pass
|
579 |
+
|
580 |
+
def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
581 |
+
text = re.sub(r"\bNo\.\b", "Number", text)
|
582 |
+
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
583 |
+
abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
|
584 |
+
def insert_periods_for_abbrev(m):
|
585 |
+
abbr = m.group(0)
|
586 |
+
if abbr in abbreviations_as_words:
|
587 |
+
return abbr
|
588 |
+
return ".".join(list(abbr)) + "."
|
589 |
+
text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
|
590 |
+
text = re.sub(r"\.\.", ".", text)
|
591 |
+
def remove_periods_for_tts(m):
|
592 |
+
return m.group().replace(".", " ").strip()
|
593 |
+
text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
|
594 |
+
text = re.sub(r"-", " ", text)
|
595 |
+
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
596 |
+
text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
|
597 |
+
text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
|
598 |
+
if speaker != "Jane":
|
599 |
+
def insert_thinking_pause(m):
|
600 |
+
word = m.group(1)
|
601 |
+
if random.random() < 0.3:
|
602 |
+
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
603 |
+
return f"{word}..., {filler}"
|
604 |
+
else:
|
605 |
+
return f"{word}...,"
|
606 |
+
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
607 |
+
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|
608 |
+
conj_pattern = r"\b(and|but|so|because|however)\b"
|
609 |
+
text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
|
610 |
+
text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
|
611 |
+
def capitalize_match(m):
|
612 |
+
return m.group().upper()
|
613 |
+
text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
|
614 |
+
return text.strip()
|
615 |
+
|
616 |
+
def _spell_digits(d: str) -> str:
|
617 |
+
digit_map = {
|
618 |
+
'0': 'zero', '1': 'one', '2': 'two', '3': 'three',
|
619 |
+
'4': 'four', '5': 'five', '6': 'six', '7': 'seven',
|
620 |
+
'8': 'eight', '9': 'nine'
|
621 |
+
}
|
622 |
+
return " ".join(digit_map[ch] for ch in d if ch in digit_map)
|
623 |
+
|
624 |
+
def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
|
625 |
+
if custom_music_path:
|
626 |
+
music_path = custom_music_path
|
627 |
+
else:
|
628 |
+
music_path = "bg_music.mp3"
|
629 |
+
|
630 |
+
try:
|
631 |
+
bg_music = AudioSegment.from_file(music_path, format="mp3")
|
632 |
except Exception as e:
|
633 |
+
print("[ERROR] Failed to load background music:", e)
|
634 |
+
return spoken
|
635 |
+
|
636 |
+
bg_music = bg_music - 18.0
|
637 |
+
total_length_ms = len(spoken) + 2000
|
638 |
+
looped_music = AudioSegment.empty()
|
639 |
+
while len(looped_music) < total_length_ms:
|
640 |
+
looped_music += bg_music
|
641 |
+
looped_music = looped_music[:total_length_ms]
|
642 |
+
final_mix = looped_music.overlay(spoken, position=2000)
|
643 |
+
return final_mix
|
644 |
|
645 |
+
def call_groq_api_for_qa(system_prompt: str) -> str:
|
646 |
+
try:
|
647 |
+
headers = {
|
648 |
+
"Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
|
649 |
+
"Content-Type": "application/json",
|
650 |
+
"Accept": "application/json"
|
651 |
+
}
|
652 |
+
data = {
|
653 |
+
"model": "deepseek/deepseek-r1",
|
654 |
+
"messages": [{"role": "user", "content": system_prompt}],
|
655 |
+
"max_tokens": 512,
|
656 |
+
"temperature": 0.7
|
657 |
+
}
|
658 |
+
response = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
659 |
+
headers=headers, data=json.dumps(data))
|
660 |
+
response.raise_for_status()
|
661 |
+
return response.json()["choices"][0]["message"]["content"].strip()
|
662 |
+
except Exception as e:
|
663 |
+
print("[ERROR] Deepseek API error:", e)
|
664 |
+
fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
|
665 |
+
return json.dumps(fallback)
|