siddhartharyaai commited on
Commit
12811af
·
verified ·
1 Parent(s): 23d3b4d

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +32 -24
utils.py CHANGED
@@ -24,12 +24,38 @@ class DialogueItem(BaseModel):
24
  class Dialogue(BaseModel):
25
  dialogue: List[DialogueItem]
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def extract_text_from_url(url):
28
- """
29
- Extracts readable text from a webpage URL.
30
- """
31
- logging.info(f"Extracting text from URL: {url}")
32
-
33
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
34
  try:
35
  response = requests.get(url, headers=headers)
@@ -38,15 +64,11 @@ def extract_text_from_url(url):
38
  for script in soup(["script", "style"]):
39
  script.decompose()
40
  return soup.get_text(separator=' ').strip()
41
-
42
  except requests.exceptions.RequestException as e:
43
  logging.error(f"Error extracting text from URL: {str(e)}")
44
  return f"Failed to extract text from URL: {str(e)}"
45
 
46
  def transcribe_youtube_video(video_url):
47
- """
48
- Uses yt-dlp to extract audio from a YouTube video and transcribe it.
49
- """
50
  temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
51
  ydl_opts = {
52
  'format': 'bestaudio/best',
@@ -57,40 +79,26 @@ def transcribe_youtube_video(video_url):
57
  }],
58
  'outtmpl': temp_audio_file.name,
59
  }
60
-
61
  try:
62
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
63
  ydl.download([video_url])
64
-
65
  return transcribe_audio(temp_audio_file.name)
66
-
67
  except Exception as e:
68
  logging.error(f"Error downloading/transcribing YouTube video: {str(e)}")
69
  return f"Error processing YouTube video: {str(e)}"
70
 
71
  def transcribe_audio(file_path):
72
- """
73
- Uses Deepgram API to transcribe audio.
74
- """
75
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
76
  if not DEEPGRAM_API_KEY:
77
- logging.error("Deepgram API key is missing.")
78
  return "Deepgram API key is missing."
79
-
80
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
81
- headers = {
82
- "Authorization": f"Token {DEEPGRAM_API_KEY}",
83
- "Content-Type": "audio/mpeg"
84
- }
85
-
86
  try:
87
  with open(file_path, "rb") as f:
88
  response = requests.post(url, headers=headers, data=f)
89
-
90
  response.raise_for_status()
91
  data = response.json()
92
  return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
93
-
94
  except Exception as e:
95
  logging.error(f"Error transcribing audio: {str(e)}")
96
  return f"Error transcribing audio: {str(e)}"
 
24
  class Dialogue(BaseModel):
25
  dialogue: List[DialogueItem]
26
 
27
+ def generate_script(prompt, text, tone, length, host_name, guest_name, sponsor_style, sponsor_provided):
28
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
29
+
30
+ try:
31
+ logging.info("Calling LLM for script generation.")
32
+ response = groq_client.chat.completions.create(
33
+ messages=[
34
+ {"role": "system", "content": prompt},
35
+ {"role": "user", "content": text}
36
+ ],
37
+ model="DeepSeek-R1-Distill-Llama-70B",
38
+ max_tokens=4096,
39
+ temperature=0.6
40
+ )
41
+
42
+ if not response.choices or not response.choices[0].message.content:
43
+ logging.error("LLM returned an empty response.")
44
+ return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="Sorry, no data available.")])
45
+
46
+ script_content = response.choices[0].message.content.strip()
47
+ dialogue_items = parse_script_to_dialogue(script_content, host_name, guest_name)
48
+
49
+ if not dialogue_items:
50
+ raise ValueError("Script parsing failed.")
51
+
52
+ return Dialogue(dialogue=dialogue_items)
53
+
54
+ except Exception as e:
55
+ logging.error(f"Failed to generate script: {str(e)}")
56
+ return Dialogue(dialogue=[DialogueItem(speaker="Jane", display_speaker="Jane", text="I'm sorry, something went wrong.")])
57
+
58
  def extract_text_from_url(url):
 
 
 
 
 
59
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
60
  try:
61
  response = requests.get(url, headers=headers)
 
64
  for script in soup(["script", "style"]):
65
  script.decompose()
66
  return soup.get_text(separator=' ').strip()
 
67
  except requests.exceptions.RequestException as e:
68
  logging.error(f"Error extracting text from URL: {str(e)}")
69
  return f"Failed to extract text from URL: {str(e)}"
70
 
71
  def transcribe_youtube_video(video_url):
 
 
 
72
  temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
73
  ydl_opts = {
74
  'format': 'bestaudio/best',
 
79
  }],
80
  'outtmpl': temp_audio_file.name,
81
  }
 
82
  try:
83
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
84
  ydl.download([video_url])
 
85
  return transcribe_audio(temp_audio_file.name)
 
86
  except Exception as e:
87
  logging.error(f"Error downloading/transcribing YouTube video: {str(e)}")
88
  return f"Error processing YouTube video: {str(e)}"
89
 
90
  def transcribe_audio(file_path):
 
 
 
91
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
92
  if not DEEPGRAM_API_KEY:
 
93
  return "Deepgram API key is missing."
 
94
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
95
+ headers = {"Authorization": f"Token {DEEPGRAM_API_KEY}", "Content-Type": "audio/mpeg"}
 
 
 
 
96
  try:
97
  with open(file_path, "rb") as f:
98
  response = requests.post(url, headers=headers, data=f)
 
99
  response.raise_for_status()
100
  data = response.json()
101
  return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
 
102
  except Exception as e:
103
  logging.error(f"Error transcribing audio: {str(e)}")
104
  return f"Error transcribing audio: {str(e)}"