Spaces:
Running
Running
########################################## | |
# Step 0: Import required libraries | |
########################################## | |
import streamlit as st # Web app framework | |
from transformers import ( | |
pipeline, | |
SpeechT5Processor, | |
SpeechT5ForTextToSpeech, | |
SpeechT5HifiGan, | |
AutoModelForCausalLM, | |
AutoTokenizer | |
) # NLP and TTS models | |
from datasets import load_dataset # Speaker embeddings | |
import torch # Tensor operations | |
import soundfile as sf # Audio file handling | |
import sentencepiece # Tokenization dependency | |
########################################## | |
# Initial configuration (MUST be first) | |
########################################## | |
st.set_page_config( | |
page_title="π Just Comment - AI Response Generator", | |
page_icon="π¬", | |
layout="centered", | |
initial_sidebar_state="collapsed" | |
) | |
########################################## | |
# Global model loading with caching | |
########################################## | |
def load_models(): | |
"""Load and cache all ML models""" | |
return { | |
# Emotion classifier | |
'emotion': pipeline( | |
"text-classification", | |
model="Thea231/jhartmann_emotion_finetuning" | |
), | |
# Text generation models | |
'textgen_tokenizer': AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
'textgen_model': AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
# TTS components | |
'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"), | |
'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"), | |
'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"), | |
# Speaker embeddings | |
'speaker_embeddings': torch.tensor( | |
load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"] | |
).unsqueeze(0) | |
} | |
########################################## | |
# UI Components | |
########################################## | |
def render_interface(): | |
"""Create user interface elements""" | |
st.title("π AI Customer Response Generator") | |
st.caption("Analyzes feedback and generates tailored responses") | |
return st.text_area( | |
"π Paste customer feedback here:", | |
placeholder="The product arrived damaged...", | |
height=150, | |
key="user_input" | |
) | |
########################################## | |
# Core Logic Components | |
########################################## | |
def analyze_emotion(text, classifier): | |
"""Determine dominant emotion with confidence threshold""" | |
results = classifier(text, return_all_scores=True)[0] | |
top_emotion = max(results, key=lambda x: x['score']) | |
return top_emotion if top_emotion['score'] > 0.6 else {'label': 'neutral', 'score': 1.0} | |
def generate_prompt(text, emotion): | |
"""Create structured prompts for different emotions""" | |
prompt_templates = { | |
"anger": ( | |
"Customer complaint: {input}\n" | |
"Respond with:\n" | |
"1. Apology\n2. Solution steps\n3. Compensation offer\n" | |
"Response:" | |
), | |
"joy": ( | |
"Positive feedback: {input}\n" | |
"Respond with:\n" | |
"1. Appreciation\n2. Highlight strengths\n3. Loyalty benefits\n" | |
"Response:" | |
), | |
"neutral": ( | |
"Customer comment: {input}\n" | |
"Respond with:\n" | |
"1. Acknowledge feedback\n2. Offer assistance\n3. Next steps\n" | |
"Response:" | |
) | |
} | |
return prompt_templates.get(emotion.lower(), prompt_templates['neutral']).format(input=text) | |
def process_response(output_text): | |
"""Ensure response quality and proper formatting""" | |
# Remove incomplete sentences | |
if '.' in output_text: | |
output_text = output_text.rsplit('.', 1)[0] + '.' | |
# Length constraints | |
output_text = output_text[:300].strip() # Hard limit at 300 characters | |
# Fallback for short responses | |
if len(output_text) < 50: | |
return "Thank you for your feedback. We'll review this and contact you shortly." | |
return output_text | |
def generate_text_response(user_input, models): | |
"""Generate and validate text response""" | |
# Emotion analysis | |
emotion = analyze_emotion(user_input, models['emotion']) | |
# Prompt engineering | |
prompt = generate_prompt(user_input, emotion['label']) | |
# Text generation | |
inputs = models['textgen_tokenizer'](prompt, return_tensors="pt") | |
outputs = models['textgen_model'].generate( | |
inputs.input_ids, | |
max_new_tokens=200, | |
temperature=0.7, | |
do_sample=True, | |
top_p=0.9 | |
) | |
# Decode and process | |
full_response = models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True) | |
return process_response(full_response.split("Response:")[-1].strip()) | |
def generate_audio_response(text, models): | |
"""Convert text to speech""" | |
# Process text input | |
inputs = models['tts_processor'](text=text, return_tensors="pt") | |
# Generate spectrogram | |
spectrogram = models['tts_model'].generate_speech( | |
inputs["input_ids"], | |
models['speaker_embeddings'] | |
) | |
# Generate waveform | |
with torch.no_grad(): | |
waveform = models['tts_vocoder'](spectrogram) | |
# Save and return audio | |
sf.write("response.wav", waveform.numpy(), samplerate=16000) | |
return "response.wav" | |
########################################## | |
# Main Application Flow | |
########################################## | |
def main(): | |
# Load models once | |
ml_models = load_models() | |
# Render UI | |
user_input = render_interface() | |
# Process input | |
if user_input: | |
# Text generation | |
with st.status("π Analyzing feedback...", expanded=True) as status: | |
text_response = generate_text_response(user_input, ml_models) | |
status.update(label="β Analysis Complete", state="complete") | |
# Display text response | |
st.subheader("π Generated Response") | |
st.markdown(f"```\n{text_response}\n```") | |
# Audio generation | |
with st.spinner("π Generating voice response..."): | |
audio_file = generate_audio_response(text_response, ml_models) | |
st.audio(audio_file, format="audio/wav") | |
if __name__ == "__main__": | |
main() |