Update utils.py
Browse files
utils.py
CHANGED
@@ -5,14 +5,19 @@ import os
|
|
5 |
import tiktoken
|
6 |
import json
|
7 |
import re
|
8 |
-
from gtts import gTTS
|
9 |
import tempfile
|
10 |
import requests
|
11 |
from bs4 import BeautifulSoup
|
|
|
|
|
12 |
|
13 |
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
14 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
15 |
|
|
|
|
|
|
|
|
|
16 |
class DialogueItem(BaseModel):
|
17 |
speaker: Literal["Maria", "Sarah"]
|
18 |
text: str
|
@@ -95,11 +100,9 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
|
|
95 |
return dialogue
|
96 |
|
97 |
def generate_audio(text: str, speaker: str) -> str:
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
104 |
-
tts.save(temp_audio.name)
|
105 |
return temp_audio.name
|
|
|
5 |
import tiktoken
|
6 |
import json
|
7 |
import re
|
|
|
8 |
import tempfile
|
9 |
import requests
|
10 |
from bs4 import BeautifulSoup
|
11 |
+
from TTS.api import TTS
|
12 |
+
import torch
|
13 |
|
14 |
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
15 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
16 |
|
17 |
+
# Initialize TTS models
|
18 |
+
tts_maria = TTS("tts_models/en/ljspeech/tacotron2-DDC")
|
19 |
+
tts_sarah = TTS("tts_models/en/ljspeech/glow-tts")
|
20 |
+
|
21 |
class DialogueItem(BaseModel):
|
22 |
speaker: Literal["Maria", "Sarah"]
|
23 |
text: str
|
|
|
100 |
return dialogue
|
101 |
|
102 |
def generate_audio(text: str, speaker: str) -> str:
|
103 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
104 |
+
if speaker == "Maria":
|
105 |
+
tts_maria.tts_to_file(text=text, file_path=temp_audio.name)
|
106 |
+
else: # Sarah
|
107 |
+
tts_sarah.tts_to_file(text=text, file_path=temp_audio.name)
|
|
|
|
|
108 |
return temp_audio.name
|