##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st  # Web app framework
from transformers import (
    pipeline,
    SpeechT5Processor,
    SpeechT5ForTextToSpeech,
    SpeechT5HifiGan,
    AutoModelForCausalLM,
    AutoTokenizer
)  # NLP and TTS models
from datasets import load_dataset  # Speaker embeddings
import torch  # Tensor operations
import soundfile as sf  # Audio file handling
import sentencepiece  # Tokenization dependency

##########################################
# Initial configuration (MUST be first)
##########################################
st.set_page_config(
    page_title="🚀 Just Comment - AI Response Generator",
    page_icon="💬",
    layout="centered",
    initial_sidebar_state="collapsed"
)

##########################################
# Global model loading with caching
##########################################
@st.cache_resource(show_spinner=False)
def load_models():
    """Load and cache all ML models"""
    return {
        # Emotion classifier
        'emotion': pipeline(
            "text-classification", 
            model="Thea231/jhartmann_emotion_finetuning"
        ),
        
        # Text generation models
        'textgen_tokenizer': AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
        'textgen_model': AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B"),
        
        # TTS components
        'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"),
        'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"),
        'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"),
        
        # Speaker embeddings
        'speaker_embeddings': torch.tensor(
            load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]
        ).unsqueeze(0)
    }

##########################################
# UI Components
##########################################
def render_interface():
    """Create user interface elements"""
    st.title("🚀 AI Customer Response Generator")
    st.caption("Analyzes feedback and generates tailored responses")
    
    return st.text_area(
        "📝 Paste customer feedback here:",
        placeholder="The product arrived damaged...",
        height=150,
        key="user_input"
    )

##########################################
# Core Logic Components
##########################################
def analyze_emotion(text, classifier):
    """Determine dominant emotion with confidence threshold"""
    results = classifier(text, return_all_scores=True)[0]
    top_emotion = max(results, key=lambda x: x['score'])
    return top_emotion if top_emotion['score'] > 0.6 else {'label': 'neutral', 'score': 1.0}

def generate_prompt(text, emotion):
    """Create structured prompts for different emotions"""
    prompt_templates = {
        "anger": (
            "Customer complaint: {input}\n"
            "Respond with:\n"
            "1. Apology\n2. Solution steps\n3. Compensation offer\n"
            "Response:"
        ),
        "joy": (
            "Positive feedback: {input}\n"
            "Respond with:\n"
            "1. Appreciation\n2. Highlight strengths\n3. Loyalty benefits\n"
            "Response:"
        ),
        "neutral": (
            "Customer comment: {input}\n"
            "Respond with:\n"
            "1. Acknowledge feedback\n2. Offer assistance\n3. Next steps\n"
            "Response:"
        )
    }
    return prompt_templates.get(emotion.lower(), prompt_templates['neutral']).format(input=text)

def process_response(output_text):
    """Ensure response quality and proper formatting"""
    # Remove incomplete sentences
    if '.' in output_text:
        output_text = output_text.rsplit('.', 1)[0] + '.'
    
    # Length constraints
    output_text = output_text[:300].strip()  # Hard limit at 300 characters
    
    # Fallback for short responses
    if len(output_text) < 50:
        return "Thank you for your feedback. We'll review this and contact you shortly."
    
    return output_text

def generate_text_response(user_input, models):
    """Generate and validate text response"""
    # Emotion analysis
    emotion = analyze_emotion(user_input, models['emotion'])
    
    # Prompt engineering
    prompt = generate_prompt(user_input, emotion['label'])
    
    # Text generation
    inputs = models['textgen_tokenizer'](prompt, return_tensors="pt")
    outputs = models['textgen_model'].generate(
        inputs.input_ids,
        max_new_tokens=200,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )
    
    # Decode and process
    full_response = models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True)
    return process_response(full_response.split("Response:")[-1].strip())

def generate_audio_response(text, models):
    """Convert text to speech"""
    # Process text input
    inputs = models['tts_processor'](text=text, return_tensors="pt")
    
    # Generate spectrogram
    spectrogram = models['tts_model'].generate_speech(
        inputs["input_ids"],
        models['speaker_embeddings']
    )
    
    # Generate waveform
    with torch.no_grad():
        waveform = models['tts_vocoder'](spectrogram)
    
    # Save and return audio
    sf.write("response.wav", waveform.numpy(), samplerate=16000)
    return "response.wav"

##########################################
# Main Application Flow
##########################################
def main():
    # Load models once
    ml_models = load_models()
    
    # Render UI
    user_input = render_interface()
    
    # Process input
    if user_input:
        # Text generation
        with st.status("🔍 Analyzing feedback...", expanded=True) as status:
            text_response = generate_text_response(user_input, ml_models)
            status.update(label="✅ Analysis Complete", state="complete")
        
        # Display text response
        st.subheader("📝 Generated Response")
        st.markdown(f"```\n{text_response}\n```")
        
        # Audio generation
        with st.spinner("🔊 Generating voice response..."):
            audio_file = generate_audio_response(text_response, ml_models)
            st.audio(audio_file, format="audio/wav")

if __name__ == "__main__":
    main()