Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ import torch # For tensor operations
|
|
15 |
import soundfile as sf # For saving audio as .wav files
|
16 |
import sentencepiece # Required by SpeechT5Processor for tokenization
|
17 |
|
|
|
18 |
##########################################
|
19 |
# Streamlit application title and input
|
20 |
##########################################
|
@@ -58,15 +59,13 @@ def analyze_dominant_emotion(user_review):
|
|
58 |
##########################################
|
59 |
# Step 2: Response Generation Function
|
60 |
##########################################
|
61 |
-
|
62 |
-
|
63 |
def response_gen(user_review):
|
64 |
"""
|
65 |
Generate a concise and logical response based on the sentiment of the user's comment.
|
66 |
"""
|
67 |
-
dominant_emotion = analyze_dominant_emotion(user_review) #
|
68 |
emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
|
69 |
-
|
70 |
# Define response templates for each emotion
|
71 |
emotion_prompts = {
|
72 |
"anger": (
|
@@ -139,14 +138,14 @@ def response_gen(user_review):
|
|
139 |
) # Default to neutral if emotion is not found
|
140 |
|
141 |
# Load the tokenizer and language model for response generation
|
142 |
-
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for
|
143 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
|
144 |
|
145 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
146 |
outputs = model.generate(
|
147 |
**inputs,
|
148 |
-
max_new_tokens=
|
149 |
-
min_length=50, # Ensure
|
150 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
151 |
temperature=0.7 # Add randomness for more natural responses
|
152 |
)
|
@@ -168,19 +167,12 @@ def sound_gen(response):
|
|
168 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
169 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
170 |
|
171 |
-
#
|
172 |
-
max_tokens =
|
173 |
-
truncated_response = response[:max_tokens]
|
174 |
-
|
175 |
-
# Process the truncated text for spectrogram generation
|
176 |
-
inputs = processor(text=truncated_response, return_tensors="pt")# Process text for spectrogram generation
|
177 |
-
inputs_embeds_size = inputs["input_ids"].size(1)
|
178 |
-
|
179 |
-
# Ensure tensor dimensions align between input IDs and speaker embeddings
|
180 |
-
speaker_embeddings = speaker_embeddings[:, :inputs_embeds_size] # Match dimensions with input IDs
|
181 |
|
182 |
-
|
183 |
-
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
|
184 |
|
185 |
with torch.no_grad():
|
186 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|
|
|
15 |
import soundfile as sf # For saving audio as .wav files
|
16 |
import sentencepiece # Required by SpeechT5Processor for tokenization
|
17 |
|
18 |
+
|
19 |
##########################################
|
20 |
# Streamlit application title and input
|
21 |
##########################################
|
|
|
59 |
##########################################
|
60 |
# Step 2: Response Generation Function
|
61 |
##########################################
|
|
|
|
|
62 |
def response_gen(user_review):
|
63 |
"""
|
64 |
Generate a concise and logical response based on the sentiment of the user's comment.
|
65 |
"""
|
66 |
+
dominant_emotion = analyze_dominant_emotion(user_review) # Determine the dominant emotion from the user's comment
|
67 |
emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
|
68 |
+
|
69 |
# Define response templates for each emotion
|
70 |
emotion_prompts = {
|
71 |
"anger": (
|
|
|
138 |
) # Default to neutral if emotion is not found
|
139 |
|
140 |
# Load the tokenizer and language model for response generation
|
141 |
+
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for text processing
|
142 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
|
143 |
|
144 |
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
|
145 |
outputs = model.generate(
|
146 |
**inputs,
|
147 |
+
max_new_tokens=150, # Limit generated tokens to ensure concise responses
|
148 |
+
min_length=50, # Ensure the generated response is logical and complete
|
149 |
no_repeat_ngram_size=2, # Avoid repetitive phrases
|
150 |
temperature=0.7 # Add randomness for more natural responses
|
151 |
)
|
|
|
167 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
|
168 |
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
|
169 |
|
170 |
+
# Limit text tokens to match the model's capacity
|
171 |
+
max_tokens = 200 # Limit the input text length to avoid tensor mismatch
|
172 |
+
truncated_response = response[:max_tokens]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
+
inputs = processor(text=truncated_response, return_tensors="pt") # Process text for spectrogram generation
|
175 |
+
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
|
176 |
|
177 |
with torch.no_grad():
|
178 |
speech = vocoder(spectrogram) # Convert spectrogram to waveform
|