########################################## # Step 0: Import required libraries ########################################## import streamlit as st # Web app framework from transformers import ( pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, AutoModelForCausalLM, AutoTokenizer ) # NLP and TTS models from datasets import load_dataset # Speaker embeddings import torch # Tensor operations import soundfile as sf # Audio file handling import sentencepiece # Tokenization dependency ########################################## # Initial configuration (MUST be first) ########################################## st.set_page_config( page_title="🚀 Just Comment - AI Response Generator", page_icon="💬", layout="centered", initial_sidebar_state="collapsed" ) ########################################## # Global model loading with caching ########################################## @st.cache_resource(show_spinner=False) def load_models(): """Load and cache all ML models""" return { # Emotion classifier 'emotion': pipeline( "text-classification", model="Thea231/jhartmann_emotion_finetuning" ), # Text generation models 'textgen_tokenizer': AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), 'textgen_model': AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B"), # TTS components 'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"), 'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"), 'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"), # Speaker embeddings 'speaker_embeddings': torch.tensor( load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"] ).unsqueeze(0) } ########################################## # UI Components ########################################## def render_interface(): """Create user interface elements""" st.title("🚀 AI Customer Response Generator") st.caption("Analyzes feedback and generates tailored responses") return st.text_area( "📝 Paste customer feedback here:", placeholder="The product arrived damaged...", height=150, key="user_input" ) ########################################## # Core Logic Components ########################################## def analyze_emotion(text, classifier): """Determine dominant emotion with confidence threshold""" results = classifier(text, return_all_scores=True)[0] top_emotion = max(results, key=lambda x: x['score']) return top_emotion if top_emotion['score'] > 0.6 else {'label': 'neutral', 'score': 1.0} def generate_prompt(text, emotion): """Create structured prompts for different emotions""" prompt_templates = { "anger": ( "Customer complaint: {input}\n" "Respond with:\n" "1. Apology\n2. Solution steps\n3. Compensation offer\n" "Response:" ), "joy": ( "Positive feedback: {input}\n" "Respond with:\n" "1. Appreciation\n2. Highlight strengths\n3. Loyalty benefits\n" "Response:" ), "neutral": ( "Customer comment: {input}\n" "Respond with:\n" "1. Acknowledge feedback\n2. Offer assistance\n3. Next steps\n" "Response:" ) } return prompt_templates.get(emotion.lower(), prompt_templates['neutral']).format(input=text) def process_response(output_text): """Ensure response quality and proper formatting""" # Remove incomplete sentences if '.' in output_text: output_text = output_text.rsplit('.', 1)[0] + '.' # Length constraints output_text = output_text[:300].strip() # Hard limit at 300 characters # Fallback for short responses if len(output_text) < 50: return "Thank you for your feedback. We'll review this and contact you shortly." return output_text def generate_text_response(user_input, models): """Generate and validate text response""" # Emotion analysis emotion = analyze_emotion(user_input, models['emotion']) # Prompt engineering prompt = generate_prompt(user_input, emotion['label']) # Text generation inputs = models['textgen_tokenizer'](prompt, return_tensors="pt") outputs = models['textgen_model'].generate( inputs.input_ids, max_new_tokens=200, temperature=0.7, do_sample=True, top_p=0.9 ) # Decode and process full_response = models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True) return process_response(full_response.split("Response:")[-1].strip()) def generate_audio_response(text, models): """Convert text to speech""" # Process text input inputs = models['tts_processor'](text=text, return_tensors="pt") # Generate spectrogram spectrogram = models['tts_model'].generate_speech( inputs["input_ids"], models['speaker_embeddings'] ) # Generate waveform with torch.no_grad(): waveform = models['tts_vocoder'](spectrogram) # Save and return audio sf.write("response.wav", waveform.numpy(), samplerate=16000) return "response.wav" ########################################## # Main Application Flow ########################################## def main(): # Load models once ml_models = load_models() # Render UI user_input = render_interface() # Process input if user_input: # Text generation with st.status("🔍 Analyzing feedback...", expanded=True) as status: text_response = generate_text_response(user_input, ml_models) status.update(label="✅ Analysis Complete", state="complete") # Display text response st.subheader("📝 Generated Response") st.markdown(f"```\n{text_response}\n```") # Audio generation with st.spinner("🔊 Generating voice response..."): audio_file = generate_audio_response(text_response, ml_models) st.audio(audio_file, format="audio/wav") if __name__ == "__main__": main()