File size: 6,380 Bytes
152d61c
 
 
7abe73c
 
 
 
 
 
 
 
 
 
 
 
 
c39c802
0e85ac7
0a4b920
0e85ac7
0a4b920
 
0e85ac7
0a4b920
 
0e85ac7
 
152d61c
0a4b920
152d61c
0a4b920
7abe73c
0a4b920
7abe73c
0a4b920
 
7abe73c
 
 
0a4b920
 
 
 
 
 
7abe73c
 
 
0a4b920
 
7abe73c
 
 
 
3970052
152d61c
0a4b920
152d61c
0a4b920
 
 
 
 
 
 
 
 
 
 
e4cf4e2
152d61c
0a4b920
7abe73c
0a4b920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152d61c
 
0a4b920
152d61c
 
0a4b920
 
 
 
 
7abe73c
0a4b920
7abe73c
0a4b920
 
 
 
 
 
 
 
7abe73c
0a4b920
7abe73c
0a4b920
7abe73c
c39c802
152d61c
7abe73c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st  # Web app framework
from transformers import (
    pipeline,
    SpeechT5Processor,
    SpeechT5ForTextToSpeech,
    SpeechT5HifiGan,
    AutoModelForCausalLM,
    AutoTokenizer
)  # NLP and TTS models
from datasets import load_dataset  # Speaker embeddings
import torch  # Tensor operations
import soundfile as sf  # Audio file handling
import sentencepiece  # Tokenization dependency

##########################################
# Initial configuration (MUST be first)
##########################################
st.set_page_config(
    page_title="πŸš€ Just Comment - AI Response Generator",
    page_icon="πŸ’¬",
    layout="centered",
    initial_sidebar_state="collapsed"
)

##########################################
# Global model loading with caching
##########################################
@st.cache_resource(show_spinner=False)
def load_models():
    """Load and cache all ML models"""
    return {
        # Emotion classifier
        'emotion': pipeline(
            "text-classification", 
            model="Thea231/jhartmann_emotion_finetuning"
        ),
        
        # Text generation models
        'textgen_tokenizer': AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
        'textgen_model': AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B"),
        
        # TTS components
        'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"),
        'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"),
        'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"),
        
        # Speaker embeddings
        'speaker_embeddings': torch.tensor(
            load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]
        ).unsqueeze(0)
    }

##########################################
# UI Components
##########################################
def render_interface():
    """Create user interface elements"""
    st.title("πŸš€ AI Customer Response Generator")
    st.caption("Analyzes feedback and generates tailored responses")
    
    return st.text_area(
        "πŸ“ Paste customer feedback here:",
        placeholder="The product arrived damaged...",
        height=150,
        key="user_input"
    )

##########################################
# Core Logic Components
##########################################
def analyze_emotion(text, classifier):
    """Determine dominant emotion with confidence threshold"""
    results = classifier(text, return_all_scores=True)[0]
    top_emotion = max(results, key=lambda x: x['score'])
    return top_emotion if top_emotion['score'] > 0.6 else {'label': 'neutral', 'score': 1.0}

def generate_prompt(text, emotion):
    """Create structured prompts for different emotions"""
    prompt_templates = {
        "anger": (
            "Customer complaint: {input}\n"
            "Respond with:\n"
            "1. Apology\n2. Solution steps\n3. Compensation offer\n"
            "Response:"
        ),
        "joy": (
            "Positive feedback: {input}\n"
            "Respond with:\n"
            "1. Appreciation\n2. Highlight strengths\n3. Loyalty benefits\n"
            "Response:"
        ),
        "neutral": (
            "Customer comment: {input}\n"
            "Respond with:\n"
            "1. Acknowledge feedback\n2. Offer assistance\n3. Next steps\n"
            "Response:"
        )
    }
    return prompt_templates.get(emotion.lower(), prompt_templates['neutral']).format(input=text)

def process_response(output_text):
    """Ensure response quality and proper formatting"""
    # Remove incomplete sentences
    if '.' in output_text:
        output_text = output_text.rsplit('.', 1)[0] + '.'
    
    # Length constraints
    output_text = output_text[:300].strip()  # Hard limit at 300 characters
    
    # Fallback for short responses
    if len(output_text) < 50:
        return "Thank you for your feedback. We'll review this and contact you shortly."
    
    return output_text

def generate_text_response(user_input, models):
    """Generate and validate text response"""
    # Emotion analysis
    emotion = analyze_emotion(user_input, models['emotion'])
    
    # Prompt engineering
    prompt = generate_prompt(user_input, emotion['label'])
    
    # Text generation
    inputs = models['textgen_tokenizer'](prompt, return_tensors="pt")
    outputs = models['textgen_model'].generate(
        inputs.input_ids,
        max_new_tokens=200,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )
    
    # Decode and process
    full_response = models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True)
    return process_response(full_response.split("Response:")[-1].strip())

def generate_audio_response(text, models):
    """Convert text to speech"""
    # Process text input
    inputs = models['tts_processor'](text=text, return_tensors="pt")
    
    # Generate spectrogram
    spectrogram = models['tts_model'].generate_speech(
        inputs["input_ids"],
        models['speaker_embeddings']
    )
    
    # Generate waveform
    with torch.no_grad():
        waveform = models['tts_vocoder'](spectrogram)
    
    # Save and return audio
    sf.write("response.wav", waveform.numpy(), samplerate=16000)
    return "response.wav"

##########################################
# Main Application Flow
##########################################
def main():
    # Load models once
    ml_models = load_models()
    
    # Render UI
    user_input = render_interface()
    
    # Process input
    if user_input:
        # Text generation
        with st.status("πŸ” Analyzing feedback...", expanded=True) as status:
            text_response = generate_text_response(user_input, ml_models)
            status.update(label="βœ… Analysis Complete", state="complete")
        
        # Display text response
        st.subheader("πŸ“ Generated Response")
        st.markdown(f"```\n{text_response}\n```")
        
        # Audio generation
        with st.spinner("πŸ”Š Generating voice response..."):
            audio_file = generate_audio_response(text_response, ml_models)
            st.audio(audio_file, format="audio/wav")

if __name__ == "__main__":
    main()