Spaces:

joey1101
/

Comment_Reply

Sleeping

App Files Files Community

joey1101 commited on Mar 27

Commit

a51e4d3

verified ·

1 Parent(s): 105a0a4

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -19

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import torch  # For tensor operations
 import soundfile as sf  # For saving audio as .wav files
 import sentencepiece  # Required by SpeechT5Processor for tokenization
 ##########################################
 # Streamlit application title and input
 ##########################################
@@ -58,15 +59,13 @@ def analyze_dominant_emotion(user_review):
 ##########################################
 # Step 2: Response Generation Function
 ##########################################
 def response_gen(user_review):
     """
     Generate a concise and logical response based on the sentiment of the user's comment.
     """
-    dominant_emotion = analyze_dominant_emotion(user_review)  # Get the dominant emotion of the user's comment
     emotion_label = dominant_emotion['label'].lower()  # Extract the emotion label in lowercase format
     # Define response templates for each emotion
     emotion_prompts = {
         "anger": (
@@ -139,14 +138,14 @@ def response_gen(user_review):
     )  # Default to neutral if emotion is not found
     # Load the tokenizer and language model for response generation
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load tokenizer for processing text inputs
     model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load language model for response generation
     inputs = tokenizer(prompt, return_tensors="pt")  # Tokenize the input prompt
     outputs = model.generate(
         **inputs,
-        max_new_tokens=200,
-        min_length=50,  # Ensure concise and complete responses
         no_repeat_ngram_size=2,  # Avoid repetitive phrases
         temperature=0.7  # Add randomness for more natural responses
     )
@@ -168,19 +167,12 @@ def sound_gen(response):
     embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")  # Load speaker embeddings
     speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)  # Use a default embedding
-    # Ensure the response is not too long for the model's capacity
-    max_tokens = 300  # Limit the input text tokens to a maximum of 300
-    truncated_response = response[:max_tokens]  # Truncate the response to fit within the limit
-    # Process the truncated text for spectrogram generation
-    inputs = processor(text=truncated_response, return_tensors="pt")# Process text for spectrogram generation
-    inputs_embeds_size = inputs["input_ids"].size(1)
-    # Ensure tensor dimensions align between input IDs and speaker embeddings
-    speaker_embeddings = speaker_embeddings[:, :inputs_embeds_size]  # Match dimensions with input IDs
-    # Generate the spectrogram using the SpeechT5 model
-    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
     with torch.no_grad():
         speech = vocoder(spectrogram)  # Convert spectrogram to waveform

 import soundfile as sf  # For saving audio as .wav files
 import sentencepiece  # Required by SpeechT5Processor for tokenization
 ##########################################
 # Streamlit application title and input
 ##########################################
 ##########################################
 # Step 2: Response Generation Function
 ##########################################
 def response_gen(user_review):
     """
     Generate a concise and logical response based on the sentiment of the user's comment.
     """
+    dominant_emotion = analyze_dominant_emotion(user_review)  # Determine the dominant emotion from the user's comment
     emotion_label = dominant_emotion['label'].lower()  # Extract the emotion label in lowercase format
     # Define response templates for each emotion
     emotion_prompts = {
         "anger": (
     )  # Default to neutral if emotion is not found
     # Load the tokenizer and language model for response generation
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load tokenizer for text processing
     model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load language model for response generation
     inputs = tokenizer(prompt, return_tensors="pt")  # Tokenize the input prompt
     outputs = model.generate(
         **inputs,
+        max_new_tokens=150,  # Limit generated tokens to ensure concise responses
+        min_length=50,  # Ensure the generated response is logical and complete
         no_repeat_ngram_size=2,  # Avoid repetitive phrases
         temperature=0.7  # Add randomness for more natural responses
     )
     embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")  # Load speaker embeddings
     speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)  # Use a default embedding
+    # Limit text tokens to match the model's capacity
+    max_tokens = 200  # Limit the input text length to avoid tensor mismatch
+    truncated_response = response[:max_tokens]
+    inputs = processor(text=truncated_response, return_tensors="pt")  # Process text for spectrogram generation
+    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)  # Generate the spectrogram
     with torch.no_grad():
         speech = vocoder(spectrogram)  # Convert spectrogram to waveform