Narayana02 commited on
Commit
f7d27a7
·
verified ·
1 Parent(s): ee47f66

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +102 -46
utils.py CHANGED
@@ -1,18 +1,20 @@
1
- from groq import Groq
2
- from pydantic import BaseModel, ValidationError
3
- from typing import List, Literal
4
  import os
5
- import tiktoken
6
- import json
7
  import re
 
8
  import tempfile
 
 
9
  from gtts import gTTS
10
  from bs4 import BeautifulSoup
11
  import requests
 
 
 
12
 
13
- groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
14
  tokenizer = tiktoken.get_encoding("cl100k_base")
15
 
 
16
  class DialogueItem(BaseModel):
17
  speaker: Literal["Priya", "Ananya"]
18
  text: str
@@ -20,6 +22,7 @@ class DialogueItem(BaseModel):
20
  class Dialogue(BaseModel):
21
  dialogue: List[DialogueItem]
22
 
 
23
  def truncate_text(text, max_tokens=2048):
24
  tokens = tokenizer.encode(text)
25
  if len(tokens) > max_tokens:
@@ -32,69 +35,59 @@ def extract_text_from_url(url):
32
  response.raise_for_status()
33
  soup = BeautifulSoup(response.text, 'html.parser')
34
 
 
35
  for script in soup(["script", "style"]):
36
  script.decompose()
37
 
 
38
  text = soup.get_text()
39
  lines = (line.strip() for line in text.splitlines())
40
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
41
- text = '\n'.join(chunk for chunk in chunks if chunk)
42
-
43
- return text
44
  except Exception as e:
45
  raise ValueError(f"Error extracting text from URL: {str(e)}")
46
 
 
 
 
 
 
 
 
 
47
  def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
48
  input_text = truncate_text(input_text)
49
  word_limit = 300 if target_length == "Short (1-2 min)" else 750
50
 
 
51
  prompt = f"""
52
  {system_prompt}
53
  TONE: {tone}
54
  TARGET LENGTH: {target_length} (approximately {word_limit} words)
55
  INPUT TEXT: {input_text}
56
  Generate a complete, well-structured podcast script that:
57
- 1. Starts with a friendly, engaging introduction that feels natural, welcoming the listeners as if Priya and Ananya are speaking directly to them.
58
- 2. Covers the main points from the input text in a conversational, relaxed manner with smooth transitions. Priya (American accent) and Ananya (British accent) should engage in a back-and-forth conversation that feels authentic and lively, as if two people are having a real interaction.
59
- 3. Voice adjustments: Ensure that the flow of conversation is natural, with slight pauses for thought and clear enunciation, making it easy for all listeners to follow along. Keep the pace relaxed but steady, with slight variations in speed for emphasis on key points—ensuring clarity and ease of understanding.
60
- 4. Concludes with a smooth and heartfelt summary, wrapping up the discussion in a way that feels genuine and leaves listeners with a sense of closure, while thanking them for tuning in.
61
- 5. The overall voice speed and tone should match the conversation and topic, ensuring the dialogue is easy to comprehend. For more intense moments, you can use a slightly faster pace for energy, and for reflective points, use a slower, thoughtful pace.
62
- 6. Fits within the {word_limit} word limit for the target length of {target_length}.
63
- 7. Strongly emphasizes the {tone} tone throughout the conversation.
64
- For a humorous tone, include jokes, puns, and playful banter, making the conversation feel light-hearted while integrating subtle cultural references and humor that listeners can relate to.
65
- For a casual tone, use colloquial language and friendly expressions that make it feel like a relaxed, informal chat between friends. Include cultural references and inside jokes to keep the conversation fun.
66
- For a formal tone, maintain a professional style with clear, structured arguments, presenting information with respect and authority, but still keeping the conversation friendly and accessible.
67
- Ensure the script feels like a real, flowing podcast conversation without abrupt transitions or unnatural interruptions.
68
- """
69
 
70
-
71
- response = groq_client.chat.completions.create(
72
- messages=[
73
- {"role": "system", "content": prompt},
74
- ],
75
- model="llama-3.1-70b-versatile",
76
- max_tokens=2048,
77
- temperature=0.7
78
- )
79
-
80
- content = response.choices[0].message.content
81
- content = re.sub(r'```json\s*|\s*```', '', content)
82
 
83
  try:
84
- json_data = json.loads(content)
85
  dialogue = Dialogue.model_validate(json_data)
86
- except json.JSONDecodeError as json_error:
87
- match = re.search(r'\{.*\}', content, re.DOTALL)
88
- if match:
89
- try:
90
- json_data = json.loads(match.group())
91
- dialogue = Dialogue.model_validate(json_data)
92
- except (json.JSONDecodeError, ValidationError) as e:
93
- raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}")
94
- else:
95
- raise ValueError(f"Failed to find valid JSON in the response: {content}")
96
- except ValidationError as e:
97
- raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}")
98
 
99
  return dialogue
100
 
@@ -104,3 +97,66 @@ def generate_audio(text: str, speaker: str) -> str:
104
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
105
  tts.save(temp_audio.name)
106
  return temp_audio.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
  import re
3
+ import json
4
  import tempfile
5
+ from typing import List, Literal
6
+ from pydantic import BaseModel, ValidationError
7
  from gtts import gTTS
8
  from bs4 import BeautifulSoup
9
  import requests
10
+ import tiktoken
11
+ import gradio as gr
12
+ from transformers import pipeline
13
 
14
+ # Initialize necessary modules
15
  tokenizer = tiktoken.get_encoding("cl100k_base")
16
 
17
+ # Dialogue models
18
  class DialogueItem(BaseModel):
19
  speaker: Literal["Priya", "Ananya"]
20
  text: str
 
22
  class Dialogue(BaseModel):
23
  dialogue: List[DialogueItem]
24
 
25
+ # Utility functions
26
  def truncate_text(text, max_tokens=2048):
27
  tokens = tokenizer.encode(text)
28
  if len(tokens) > max_tokens:
 
35
  response.raise_for_status()
36
  soup = BeautifulSoup(response.text, 'html.parser')
37
 
38
+ # Remove scripts and styles
39
  for script in soup(["script", "style"]):
40
  script.decompose()
41
 
42
+ # Extract text
43
  text = soup.get_text()
44
  lines = (line.strip() for line in text.splitlines())
45
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
46
+ return '\n'.join(chunk for chunk in chunks if chunk)
 
 
47
  except Exception as e:
48
  raise ValueError(f"Error extracting text from URL: {str(e)}")
49
 
50
+ def summarize_text(text, max_length=150):
51
+ """
52
+ Summarize the given text to a specified maximum length.
53
+ """
54
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
55
+ summary = summarizer(text, max_length=max_length, min_length=50, do_sample=False)
56
+ return summary[0]['summary_text']
57
+
58
  def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
59
  input_text = truncate_text(input_text)
60
  word_limit = 300 if target_length == "Short (1-2 min)" else 750
61
 
62
+ # Prompt for dialogue generation
63
  prompt = f"""
64
  {system_prompt}
65
  TONE: {tone}
66
  TARGET LENGTH: {target_length} (approximately {word_limit} words)
67
  INPUT TEXT: {input_text}
68
  Generate a complete, well-structured podcast script that:
69
+ - Starts with a friendly introduction.
70
+ - Covers the main points from the input text in a conversational style.
71
+ - Priya (American accent) and Ananya (British accent) alternate in a lively back-and-forth conversation.
72
+ - Concludes with a heartfelt summary and thanks listeners.
73
+ - Strongly emphasizes the {tone} tone and keeps within the {word_limit} word limit.
74
+ """
 
 
 
 
 
 
75
 
76
+ # Mockup Groq response for demonstration (replace with actual API call if needed)
77
+ response_content = json.dumps({
78
+ "dialogue": [
79
+ {"speaker": "Priya", "text": "Hi everyone, welcome to our podcast!"},
80
+ {"speaker": "Ananya", "text": "Yes, we're so glad you're here! Let's dive in."},
81
+ {"speaker": "Priya", "text": "Today, we're talking about AI and its impact on society."},
82
+ {"speaker": "Ananya", "text": "Absolutely, it's such a fascinating topic."}
83
+ ]
84
+ })
 
 
 
85
 
86
  try:
87
+ json_data = json.loads(response_content)
88
  dialogue = Dialogue.model_validate(json_data)
89
+ except (json.JSONDecodeError, ValidationError) as e:
90
+ raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {response_content}")
 
 
 
 
 
 
 
 
 
 
91
 
92
  return dialogue
93
 
 
97
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
98
  tts.save(temp_audio.name)
99
  return temp_audio.name
100
+
101
+ # Main function for podcast generation
102
+ def generate_podcast(uploaded_file, url, tone, target_length):
103
+ # Extract text from the uploaded file or URL
104
+ if uploaded_file:
105
+ with open(uploaded_file.name, "r") as file:
106
+ input_text = file.read()
107
+ elif url:
108
+ input_text = extract_text_from_url(url)
109
+ else:
110
+ return "Please provide either a URL or a file.", None
111
+
112
+ # Generate podcast script
113
+ system_prompt = "You are an AI script generator for podcasts."
114
+ dialogue = generate_script(system_prompt, input_text, tone, target_length)
115
+
116
+ # Generate audio for each speaker
117
+ audio_files = []
118
+ for item in dialogue.dialogue:
119
+ audio_path = generate_audio(item.text, item.speaker)
120
+ audio_files.append(audio_path)
121
+
122
+ # Combine all audio files into a single output (simplified for demo)
123
+ combined_audio = audio_files[0] # Just returning the first file for demo
124
+ transcript = "\n".join([f"{item.speaker}: {item.text}" for item in dialogue.dialogue])
125
+ return combined_audio, transcript
126
+
127
+ # Gradio Interface
128
+ instructions = """
129
+ 1. Upload a PDF file or provide a URL to generate a podcast.
130
+ 2. Choose the podcast tone and desired length.
131
+ 3. Click submit to generate the podcast and transcript.
132
+ """
133
+
134
+ iface = gr.Interface(
135
+ fn=generate_podcast,
136
+ inputs=[
137
+ gr.File(label="Upload PDF file (optional)", file_types=[".pdf", ".txt"]),
138
+ gr.Textbox(label="OR Enter URL"),
139
+ gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
140
+ gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
141
+ ],
142
+ outputs=[
143
+ gr.Audio(label="Generated Podcast"),
144
+ gr.Markdown(label="Transcript")
145
+ ],
146
+ title="🎙️ Amuthvani: AI Podcast!",
147
+ description=instructions,
148
+ allow_flagging="never",
149
+ theme=gr.themes.Soft()
150
+ )
151
+
152
+ # Summarization Interface
153
+ summarize_interface = gr.Interface(
154
+ fn=summarize_text,
155
+ inputs=gr.Textbox(label="Enter text for briefing"),
156
+ outputs=gr.Textbox(label="Briefing Document Summary"),
157
+ title="📝 Briefing Document"
158
+ )
159
+
160
+ # Combined Tabbed Interface
161
+ combined = gr.TabbedInterface([iface, summarize_interface], ["Podcast Generator", "Briefing Document"])
162
+ combined.launch()