Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,103 +1,148 @@
|
|
1 |
##########################################
|
2 |
# Step 0: Import required libraries
|
3 |
##########################################
|
4 |
-
import streamlit as st #
|
5 |
-
from transformers import (
|
6 |
-
pipeline,
|
7 |
-
SpeechT5Processor,
|
8 |
-
SpeechT5ForTextToSpeech,
|
9 |
-
SpeechT5HifiGan,
|
10 |
-
AutoModelForCausalLM,
|
11 |
-
AutoTokenizer
|
12 |
-
) #
|
13 |
-
from datasets import load_dataset #
|
14 |
-
import torch #
|
15 |
-
import soundfile as sf #
|
|
|
16 |
|
17 |
##########################################
|
18 |
-
#
|
19 |
##########################################
|
20 |
-
st.
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
##########################################
|
25 |
-
#
|
26 |
##########################################
|
27 |
-
def
|
28 |
-
"""
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
##########################################
|
35 |
-
#
|
36 |
##########################################
|
37 |
-
def
|
38 |
-
"""
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
# Define response templates for each emotion
|
43 |
-
emotion_prompts = {
|
44 |
-
"anger": "I appreciate your feedback and apologize for the inconvenience caused by '{review}'. We're committed to resolving this issue promptly and will ensure it doesn't happen again. Thank you for your patience.",
|
45 |
-
"joy": "Thank you for your positive feedback on '{review}'! We're thrilled to hear you had a great experience and hope to serve you again soon.",
|
46 |
-
"disgust": "We regret that your experience with '{review}' did not meet our standards. We will take immediate steps to address this issue and appreciate your understanding.",
|
47 |
-
"fear": "Your safety is our priority. Regarding your concern about '{review}', we ensure that all our products meet strict safety standards. Please feel free to reach out for further assistance.",
|
48 |
-
"neutral": "Thank you for your feedback on '{review}'. We value your input and would love to hear more about your experience to improve our services.",
|
49 |
-
"sadness": "I'm sorry to hear that you were disappointed with '{review}'. We're here to help and would like to offer you a solution tailored to your needs.",
|
50 |
-
"surprise": "We're glad to hear that '{review}' exceeded your expectations! Thank you for sharing your excitement with us."
|
51 |
-
}
|
52 |
-
|
53 |
-
# Format the prompt with the user's review
|
54 |
-
prompt = emotion_prompts.get(emotion_label, "Neutral").format(review=user_review)
|
55 |
-
|
56 |
-
# Load a pre-trained text generation model
|
57 |
-
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer
|
58 |
-
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load model
|
59 |
-
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the prompt
|
60 |
-
|
61 |
-
outputs = model.generate(**inputs, max_new_tokens=100) # Generate a response
|
62 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode the generated text
|
63 |
-
return response.strip()[:200] # Return a response trimmed to 200 characters
|
64 |
|
65 |
##########################################
|
66 |
-
#
|
67 |
##########################################
|
68 |
-
def
|
69 |
-
"""
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
82 |
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
with torch.no_grad():
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000) # Save audio
|
89 |
-
st.audio("customer_service_response.wav") # Play the audio in Streamlit
|
90 |
|
91 |
##########################################
|
92 |
-
# Main
|
93 |
##########################################
|
94 |
def main():
|
95 |
-
"""
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
-
# Run the main function
|
102 |
if __name__ == "__main__":
|
103 |
-
main()
|
|
|
1 |
##########################################
|
2 |
# Step 0: Import required libraries
|
3 |
##########################################
|
4 |
+
import streamlit as st # Web app framework
|
5 |
+
from transformers import (
|
6 |
+
pipeline,
|
7 |
+
SpeechT5Processor,
|
8 |
+
SpeechT5ForTextToSpeech,
|
9 |
+
SpeechT5HifiGan,
|
10 |
+
AutoModelForCausalLM,
|
11 |
+
AutoTokenizer
|
12 |
+
) # NLP and TTS models
|
13 |
+
from datasets import load_dataset # Speaker embeddings
|
14 |
+
import torch # Tensor operations
|
15 |
+
import soundfile as sf # Audio file handling
|
16 |
+
import sentencepiece # Tokenization dependency
|
17 |
|
18 |
##########################################
|
19 |
+
# Initialize models and resources globally
|
20 |
##########################################
|
21 |
+
@st.cache_resource # Cache resources to reduce reload time
|
22 |
+
def load_models():
|
23 |
+
"""Load all required models once and cache them"""
|
24 |
+
return {
|
25 |
+
'emotion_classifier': pipeline(
|
26 |
+
"text-classification",
|
27 |
+
model="Thea231/jhartmann_emotion_finetuning"
|
28 |
+
),
|
29 |
+
'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"),
|
30 |
+
'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"),
|
31 |
+
'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"),
|
32 |
+
'textgen_tokenizer': AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
33 |
+
'textgen_model': AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
34 |
+
'speaker_embeddings': torch.tensor(
|
35 |
+
load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]
|
36 |
+
).unsqueeze(0)
|
37 |
+
}
|
38 |
|
39 |
##########################################
|
40 |
+
# Streamlit UI Configuration
|
41 |
##########################################
|
42 |
+
def setup_ui():
|
43 |
+
"""Configure Streamlit user interface"""
|
44 |
+
st.set_page_config(page_title="π Just Comment", page_icon="π¬")
|
45 |
+
st.title("π Just Comment - Smart Response Generator")
|
46 |
+
st.markdown("""
|
47 |
+
<style>
|
48 |
+
.reportview-container {background: #f8f9fa;}
|
49 |
+
.stTextArea textarea {border: 2px solid #dee2e6;}
|
50 |
+
</style>
|
51 |
+
""", unsafe_allow_html=True)
|
52 |
+
return st.text_area("π Enter your customer comment:", "", height=150)
|
53 |
|
54 |
##########################################
|
55 |
+
# Enhanced Sentiment Analysis
|
56 |
##########################################
|
57 |
+
def analyze_emotion(text, classifier):
|
58 |
+
"""Determine dominant emotion with confidence threshold"""
|
59 |
+
results = classifier(text, return_all_scores=True)[0]
|
60 |
+
top_emotion = max(results, key=lambda x: x['score'])
|
61 |
+
return top_emotion if top_emotion['score'] > 0.6 else {'label': 'neutral', 'score': 1.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
##########################################
|
64 |
+
# Improved Response Generation
|
65 |
##########################################
|
66 |
+
def generate_response(text, models):
|
67 |
+
"""Generate context-appropriate response with length control"""
|
68 |
+
emotion = analyze_emotion(text, models['emotion_classifier'])
|
69 |
+
prompt = create_prompt(text, emotion['label'].lower())
|
70 |
+
|
71 |
+
inputs = models['textgen_tokenizer'](prompt, return_tensors="pt")
|
72 |
+
outputs = models['textgen_model'].generate(
|
73 |
+
**inputs,
|
74 |
+
max_new_tokens=200, # Increased for longer responses
|
75 |
+
temperature=0.7, # Balance creativity and focus
|
76 |
+
do_sample=True,
|
77 |
+
top_p=0.9,
|
78 |
+
no_repeat_ngram_size=2
|
79 |
+
)
|
80 |
+
|
81 |
+
response = models['textgen_tokenizer'].decode(
|
82 |
+
outputs[0][inputs.input_ids.shape[1]:],
|
83 |
+
skip_special_tokens=True
|
84 |
+
)
|
85 |
+
return postprocess_response(response)
|
86 |
|
87 |
+
def create_prompt(text, emotion):
|
88 |
+
"""Create emotion-specific prompts with structured guidance"""
|
89 |
+
templates = {
|
90 |
+
"anger": (
|
91 |
+
"Complaint: {input}\nRespond by:\n1. Apologizing sincerely\n"
|
92 |
+
"2. Proving solution steps\n3. Offering compensation\nResponse:"
|
93 |
+
),
|
94 |
+
"joy": (
|
95 |
+
"Positive feedback: {input}\nRespond by:\n1. Thanking customer\n"
|
96 |
+
"2. Highlighting strengths\n3. Suggesting rewards\nResponse:"
|
97 |
+
),
|
98 |
+
# Add other emotion templates...
|
99 |
+
"neutral": (
|
100 |
+
"Feedback: {input}\nRespond by:\n1. Acknowledging input\n"
|
101 |
+
"2. Providing information\n3. Requesting details\nResponse:"
|
102 |
+
)
|
103 |
+
}
|
104 |
+
return templates.get(emotion, templates['neutral']).format(input=text)
|
105 |
|
106 |
+
def postprocess_response(text):
|
107 |
+
"""Ensure response quality and length"""
|
108 |
+
text = text.split("\n\n")[0].strip() # Take first complete response
|
109 |
+
if len(text) < 50: # Minimum length check
|
110 |
+
return "Thank you for your feedback. We'll carefully review your comments and follow up shortly."
|
111 |
+
return text[:300] # Hard length limit
|
112 |
|
113 |
+
##########################################
|
114 |
+
# Optimized Text-to-Speech
|
115 |
+
##########################################
|
116 |
+
def generate_speech(text, models):
|
117 |
+
"""Convert text to speech with performance optimizations"""
|
118 |
+
inputs = models['tts_processor'](text=text, return_tensors="pt")
|
119 |
+
spectrogram = models['tts_model'].generate_speech(
|
120 |
+
inputs["input_ids"],
|
121 |
+
models['speaker_embeddings']
|
122 |
+
)
|
123 |
with torch.no_grad():
|
124 |
+
audio = models['tts_vocoder'](spectrogram)
|
125 |
+
sf.write("response.wav", audio.numpy(), 16000)
|
126 |
+
return "response.wav"
|
|
|
|
|
127 |
|
128 |
##########################################
|
129 |
+
# Main Application Logic
|
130 |
##########################################
|
131 |
def main():
|
132 |
+
"""Main execution flow"""
|
133 |
+
models = load_models() # Load models once
|
134 |
+
user_input = setup_ui()
|
135 |
+
|
136 |
+
if user_input:
|
137 |
+
with st.spinner("π Analyzing sentiment and generating response..."):
|
138 |
+
response = generate_response(user_input, models)
|
139 |
+
|
140 |
+
st.subheader("π‘ Generated Response:")
|
141 |
+
st.markdown(f"```\n{response}\n```")
|
142 |
+
|
143 |
+
with st.spinner("π Generating voice response..."):
|
144 |
+
audio_file = generate_speech(response, models)
|
145 |
+
st.audio(audio_file, format="audio/wav")
|
146 |
|
|
|
147 |
if __name__ == "__main__":
|
148 |
+
main()
|