Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -12,6 +12,14 @@ import tiktoken
|
|
12 |
from groq import Groq
|
13 |
import numpy as np
|
14 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
class DialogueItem(BaseModel):
|
17 |
speaker: Literal["Jane", "John"]
|
@@ -28,6 +36,7 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
|
|
28 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
29 |
|
30 |
try:
|
|
|
31 |
response = groq_client.chat.completions.create(
|
32 |
messages=[
|
33 |
{"role": "system", "content": prompt},
|
@@ -39,7 +48,20 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
|
|
39 |
)
|
40 |
|
41 |
if not response.choices or not response.choices[0].message.content:
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
script_content = response.choices[0].message.content.strip()
|
45 |
dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
|
@@ -50,7 +72,7 @@ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_s
|
|
50 |
return Dialogue(dialogue=dialogue_items)
|
51 |
|
52 |
except Exception as e:
|
53 |
-
|
54 |
return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
|
55 |
|
56 |
def parse_script_to_dialogue(script, host_name, guest_name):
|
@@ -80,67 +102,9 @@ def truncate_text(text, max_tokens=2048):
|
|
80 |
|
81 |
return text
|
82 |
|
83 |
-
def extract_text_from_url(url):
|
84 |
-
"""
|
85 |
-
Extracts readable text from a given URL.
|
86 |
-
"""
|
87 |
-
headers = {
|
88 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
89 |
-
}
|
90 |
-
response = requests.get(url, headers=headers)
|
91 |
-
if response.status_code != 200:
|
92 |
-
return ""
|
93 |
-
|
94 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
95 |
-
for script in soup(["script", "style"]):
|
96 |
-
script.decompose()
|
97 |
-
|
98 |
-
return soup.get_text(separator=' ')
|
99 |
-
|
100 |
-
def transcribe_youtube_video(video_url):
|
101 |
-
"""
|
102 |
-
Uses yt-dlp to extract audio and transcribe speech.
|
103 |
-
"""
|
104 |
-
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
105 |
-
ydl_opts = {
|
106 |
-
'format': 'bestaudio/best',
|
107 |
-
'postprocessors': [{
|
108 |
-
'key': 'FFmpegExtractAudio',
|
109 |
-
'preferredcodec': 'mp3',
|
110 |
-
'preferredquality': '192',
|
111 |
-
}],
|
112 |
-
'outtmpl': temp_audio_file.name,
|
113 |
-
}
|
114 |
-
|
115 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
116 |
-
ydl.download([video_url])
|
117 |
-
|
118 |
-
return transcribe_audio(temp_audio_file.name)
|
119 |
-
|
120 |
-
def transcribe_audio(file_path):
|
121 |
-
"""
|
122 |
-
Uses Deepgram API to transcribe audio.
|
123 |
-
"""
|
124 |
-
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
|
125 |
-
if not DEEPGRAM_API_KEY:
|
126 |
-
return "Deepgram API key is missing."
|
127 |
-
|
128 |
-
url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
|
129 |
-
headers = {
|
130 |
-
"Authorization": f"Token {DEEPGRAM_API_KEY}",
|
131 |
-
"Content-Type": "audio/mpeg"
|
132 |
-
}
|
133 |
-
|
134 |
-
with open(file_path, "rb") as f:
|
135 |
-
response = requests.post(url, headers=headers, data=f)
|
136 |
-
|
137 |
-
response.raise_for_status()
|
138 |
-
data = response.json()
|
139 |
-
return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
|
140 |
-
|
141 |
def generate_audio_mp3(text, speaker):
|
142 |
"""
|
143 |
-
Uses Groq's LLM to generate realistic text-to-speech (TTS) audio.
|
144 |
"""
|
145 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
146 |
response = groq_client.chat.completions.create(
|
@@ -150,25 +114,28 @@ def generate_audio_mp3(text, speaker):
|
|
150 |
temperature=0.6
|
151 |
)
|
152 |
|
153 |
-
speech_text = response.choices[0].message.content
|
154 |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
155 |
-
temp_audio.write(speech_text.encode('utf-8'))
|
156 |
-
return temp_audio.name
|
157 |
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
else:
|
169 |
-
mixed_audio = spoken_audio
|
170 |
|
171 |
-
|
|
|
|
|
|
|
|
|
172 |
|
173 |
def research_topic(topic: str) -> str:
|
174 |
"""
|
@@ -177,7 +144,9 @@ def research_topic(topic: str) -> str:
|
|
177 |
from run_agents import run_deep_research_agent # Ensure Open Deep Researcher is connected.
|
178 |
|
179 |
try:
|
|
|
180 |
research_result = run_deep_research_agent(topic)
|
181 |
return research_result if research_result else f"No new information found for '{topic}'."
|
182 |
except Exception as e:
|
|
|
183 |
return f"Error during research: {str(e)}"
|
|
|
12 |
from groq import Groq
|
13 |
import numpy as np
|
14 |
import torch
|
15 |
+
import logging
|
16 |
+
|
17 |
+
# Configure Logging
|
18 |
+
logging.basicConfig(
|
19 |
+
filename="debug.log",
|
20 |
+
level=logging.DEBUG,
|
21 |
+
format="%(asctime)s - %(levelname)s - %(message)s"
|
22 |
+
)
|
23 |
|
24 |
class DialogueItem(BaseModel):
|
25 |
speaker: Literal["Jane", "John"]
|
|
|
36 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
37 |
|
38 |
try:
|
39 |
+
logging.info("Calling LLM for script generation.")
|
40 |
response = groq_client.chat.completions.create(
|
41 |
messages=[
|
42 |
{"role": "system", "content": prompt},
|
|
|
48 |
)
|
49 |
|
50 |
if not response.choices or not response.choices[0].message.content:
|
51 |
+
logging.error("LLM returned an empty response. Retrying with fallback prompt...")
|
52 |
+
fallback_prompt = "Summarize the topic in a conversational way between two speakers."
|
53 |
+
response = groq_client.chat.completions.create(
|
54 |
+
messages=[
|
55 |
+
{"role": "system", "content": fallback_prompt},
|
56 |
+
{"role": "user", "content": text}
|
57 |
+
],
|
58 |
+
model="DeepSeek-R1-Distill-Llama-70B",
|
59 |
+
max_tokens=4096,
|
60 |
+
temperature=0.6
|
61 |
+
)
|
62 |
+
|
63 |
+
if not response.choices or not response.choices[0].message.content:
|
64 |
+
raise ValueError("LLM failed twice. No valid script generated.")
|
65 |
|
66 |
script_content = response.choices[0].message.content.strip()
|
67 |
dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
|
|
|
72 |
return Dialogue(dialogue=dialogue_items)
|
73 |
|
74 |
except Exception as e:
|
75 |
+
logging.error(f"Failed to generate script: {str(e)}")
|
76 |
return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
|
77 |
|
78 |
def parse_script_to_dialogue(script, host_name, guest_name):
|
|
|
102 |
|
103 |
return text
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def generate_audio_mp3(text, speaker):
|
106 |
"""
|
107 |
+
Uses Groq's LLM to generate realistic text-to-speech (TTS) audio and ensures it is valid.
|
108 |
"""
|
109 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
110 |
response = groq_client.chat.completions.create(
|
|
|
114 |
temperature=0.6
|
115 |
)
|
116 |
|
117 |
+
speech_text = response.choices[0].message.content.strip()
|
118 |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
|
|
|
|
119 |
|
120 |
+
try:
|
121 |
+
temp_audio.write(speech_text.encode('utf-8'))
|
122 |
+
temp_audio.close()
|
123 |
+
|
124 |
+
# ✅ Verify MP3 File Integrity Before Returning
|
125 |
+
audio_test = AudioSegment.from_file(temp_audio.name, format="mp3")
|
126 |
+
if len(audio_test) == 0:
|
127 |
+
raise ValueError("Generated MP3 file is empty or corrupted.")
|
128 |
|
129 |
+
return temp_audio.name
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logging.error(f"Failed to generate MP3 file: {str(e)}")
|
|
|
|
|
133 |
|
134 |
+
# ✅ Fallback: Generate a silent MP3 file to prevent ffmpeg crashes
|
135 |
+
silent_audio = AudioSegment.silent(duration=2000) # 2 seconds of silence
|
136 |
+
fallback_mp3 = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
137 |
+
silent_audio.export(fallback_mp3.name, format="mp3")
|
138 |
+
return fallback_mp3.name
|
139 |
|
140 |
def research_topic(topic: str) -> str:
|
141 |
"""
|
|
|
144 |
from run_agents import run_deep_research_agent # Ensure Open Deep Researcher is connected.
|
145 |
|
146 |
try:
|
147 |
+
logging.info(f"Running Open Deep Researcher for topic: {topic}")
|
148 |
research_result = run_deep_research_agent(topic)
|
149 |
return research_result if research_result else f"No new information found for '{topic}'."
|
150 |
except Exception as e:
|
151 |
+
logging.error(f"Error during research: {str(e)}")
|
152 |
return f"Error during research: {str(e)}"
|