joey1101 commited on
Commit
a51e4d3
·
verified ·
1 Parent(s): 105a0a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -19
app.py CHANGED
@@ -15,6 +15,7 @@ import torch # For tensor operations
15
  import soundfile as sf # For saving audio as .wav files
16
  import sentencepiece # Required by SpeechT5Processor for tokenization
17
 
 
18
  ##########################################
19
  # Streamlit application title and input
20
  ##########################################
@@ -58,15 +59,13 @@ def analyze_dominant_emotion(user_review):
58
  ##########################################
59
  # Step 2: Response Generation Function
60
  ##########################################
61
-
62
-
63
  def response_gen(user_review):
64
  """
65
  Generate a concise and logical response based on the sentiment of the user's comment.
66
  """
67
- dominant_emotion = analyze_dominant_emotion(user_review) # Get the dominant emotion of the user's comment
68
  emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
69
-
70
  # Define response templates for each emotion
71
  emotion_prompts = {
72
  "anger": (
@@ -139,14 +138,14 @@ def response_gen(user_review):
139
  ) # Default to neutral if emotion is not found
140
 
141
  # Load the tokenizer and language model for response generation
142
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for processing text inputs
143
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
144
 
145
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
146
  outputs = model.generate(
147
  **inputs,
148
- max_new_tokens=200,
149
- min_length=50, # Ensure concise and complete responses
150
  no_repeat_ngram_size=2, # Avoid repetitive phrases
151
  temperature=0.7 # Add randomness for more natural responses
152
  )
@@ -168,19 +167,12 @@ def sound_gen(response):
168
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
169
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
170
 
171
- # Ensure the response is not too long for the model's capacity
172
- max_tokens = 300 # Limit the input text tokens to a maximum of 300
173
- truncated_response = response[:max_tokens] # Truncate the response to fit within the limit
174
-
175
- # Process the truncated text for spectrogram generation
176
- inputs = processor(text=truncated_response, return_tensors="pt")# Process text for spectrogram generation
177
- inputs_embeds_size = inputs["input_ids"].size(1)
178
-
179
- # Ensure tensor dimensions align between input IDs and speaker embeddings
180
- speaker_embeddings = speaker_embeddings[:, :inputs_embeds_size] # Match dimensions with input IDs
181
 
182
- # Generate the spectrogram using the SpeechT5 model
183
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
184
 
185
  with torch.no_grad():
186
  speech = vocoder(spectrogram) # Convert spectrogram to waveform
 
15
  import soundfile as sf # For saving audio as .wav files
16
  import sentencepiece # Required by SpeechT5Processor for tokenization
17
 
18
+
19
  ##########################################
20
  # Streamlit application title and input
21
  ##########################################
 
59
  ##########################################
60
  # Step 2: Response Generation Function
61
  ##########################################
 
 
62
  def response_gen(user_review):
63
  """
64
  Generate a concise and logical response based on the sentiment of the user's comment.
65
  """
66
+ dominant_emotion = analyze_dominant_emotion(user_review) # Determine the dominant emotion from the user's comment
67
  emotion_label = dominant_emotion['label'].lower() # Extract the emotion label in lowercase format
68
+
69
  # Define response templates for each emotion
70
  emotion_prompts = {
71
  "anger": (
 
138
  ) # Default to neutral if emotion is not found
139
 
140
  # Load the tokenizer and language model for response generation
141
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for text processing
142
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
143
 
144
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
145
  outputs = model.generate(
146
  **inputs,
147
+ max_new_tokens=150, # Limit generated tokens to ensure concise responses
148
+ min_length=50, # Ensure the generated response is logical and complete
149
  no_repeat_ngram_size=2, # Avoid repetitive phrases
150
  temperature=0.7 # Add randomness for more natural responses
151
  )
 
167
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
168
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
169
 
170
+ # Limit text tokens to match the model's capacity
171
+ max_tokens = 200 # Limit the input text length to avoid tensor mismatch
172
+ truncated_response = response[:max_tokens]
 
 
 
 
 
 
 
173
 
174
+ inputs = processor(text=truncated_response, return_tensors="pt") # Process text for spectrogram generation
175
+ spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
176
 
177
  with torch.no_grad():
178
  speech = vocoder(spectrogram) # Convert spectrogram to waveform