File size: 10,410 Bytes
152d61c
 
 
 
f26186a
152d61c
 
 
 
 
 
f26186a
 
152d61c
 
 
 
 
 
 
f26186a
152d61c
f26186a
152d61c
f26186a
152d61c
b70e6a4
152d61c
f26186a
152d61c
f26186a
152d61c
f26186a
152d61c
 
 
d090d86
f26186a
 
152d61c
 
f26186a
152d61c
 
 
 
 
 
 
 
 
b70e6a4
f26186a
 
 
2253128
152d61c
b70e6a4
152d61c
2253128
b70e6a4
f26186a
b70e6a4
f26186a
 
 
b70e6a4
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
 
 
 
 
f26186a
 
 
 
b70e6a4
 
 
f26186a
 
 
 
 
 
2253128
152d61c
b70e6a4
f26186a
b70e6a4
f26186a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152d61c
 
 
 
 
 
f26186a
152d61c
f26186a
 
 
 
 
 
 
 
 
152d61c
f26186a
 
 
 
152d61c
 
 
 
 
 
f26186a
 
152d61c
f26186a
 
152d61c
 
 
f26186a
 
 
152d61c
b70e6a4
152d61c
b70e6a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st  # For building the web application interface
from transformers import (  # For text classification, text-to-speech, and text generation
    pipeline,
    SpeechT5Processor,
    SpeechT5ForTextToSpeech,
    SpeechT5HifiGan,
    AutoModelForCausalLM,
    AutoTokenizer
)
from datasets import load_dataset  # To load speaker embeddings dataset
import torch  # For tensor operations
import soundfile as sf  # For saving audio as .wav files
import sentencepiece  # Required by SpeechT5Processor for tokenization

##########################################
# Streamlit application title and input
##########################################
# Display a deep blue title in a large, visually appealing font
st.markdown(
    "<h1 style='text-align: center; color: #00008B; font-size: 50px;'>🚀 Just Comment</h1>",
    unsafe_allow_html=True
)  # Set deep blue title

# Display a gentle, warm subtitle below the title
st.markdown(
    "<h3 style='text-align: center; color: #5D6D7E; font-style: italic;'>I'm listening to you, my friend~</h3>",
    unsafe_allow_html=True
)  # Set a friendly subtitle

# Add a text area for user input with placeholder and tooltip
text = st.text_area(
    "Enter your comment",
    placeholder="Type something here...",
    height=100,
    help="Write a comment you would like us to respond to!"  # Provide tooltip
)  # Create text input field

##########################################
# Step 1: Sentiment Analysis Function
##########################################
def analyze_dominant_emotion(user_review):
    """
    Analyze the dominant emotion in the user's comment using a fine-tuned text classification model.
    """
    emotion_classifier = pipeline(
        "text-classification",
        model="Thea231/jhartmann_emotion_finetuning",
        return_all_scores=True
    )  # Load the sentiment classification model
    emotion_results = emotion_classifier(user_review)[0]  # Get sentiment scores for the input text
    dominant_emotion = max(emotion_results, key=lambda x: x['score'])  # Identify the emotion with highest score
    return dominant_emotion  # Return the dominant emotion (as a dict with label and score)

##########################################
# Step 2: Response Generation Functions
##########################################
def prompt_gen(user_review):
    """
    Generate the text generation prompt based on the user's comment and detected emotion.
    """
    # Get dominant emotion for the input
    dominant_emotion = analyze_dominant_emotion(user_review)  # Analyze user's comment
    # Define response templates for 7 emotions
    emotion_strategies = {
        "anger": {
            "prompt": (
                "Customer complaint: '{review}'\n\n"
                "As a customer service representative, craft a professional response that:\n"
                "- Begins with a sincere apology and acknowledgment.\n"
                "- Clearly explains a solution process with concrete steps.\n"
                "- Offers appropriate compensation or redemption.\n"
                "- Keeps a humble and solution-focused tone (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "disgust": {
            "prompt": (
                "Customer quality concern: '{review}'\n\n"
                "As a customer service representative, craft a response that:\n"
                "- Immediately acknowledges the product issue.\n"
                "- Explains measures taken in quality control.\n"
                "- Provides clear return/replacement instructions.\n"
                "- Offers a goodwill gesture (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "fear": {
            "prompt": (
                "Customer safety concern: '{review}'\n\n"
                "As a customer service representative, craft a reassuring response that:\n"
                "- Directly addresses the safety worries.\n"
                "- References relevant certifications or standards.\n"
                "- Offers a dedicated support contact.\n"
                "- Provides a satisfaction guarantee (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "joy": {
            "prompt": (
                "Customer review: '{review}'\n\n"
                "As a customer service representative, craft a concise response that:\n"
                "- Thanks the customer for their feedback.\n"
                "- Acknowledges both positive and constructive points.\n"
                "- Invites them to explore loyalty or referral programs (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "neutral": {
            "prompt": (
                "Customer feedback: '{review}'\n\n"
                "As a customer service representative, craft a balanced response that:\n"
                "- Provides additional relevant product information.\n"
                "- Highlights key service features.\n"
                "- Politely requests more detailed feedback.\n"
                "- Maintains a professional tone (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "sadness": {
            "prompt": (
                "Customer disappointment: '{review}'\n\n"
                "As a customer service representative, craft an empathetic response that:\n"
                "- Shows genuine understanding of the issue.\n"
                "- Proposes a personalized recovery solution.\n"
                "- Offers extended support options.\n"
                "- Maintains a positive outlook (1-3 sentences).\n\n"
                "Response:"
            )
        },
        "surprise": {
            "prompt": (
                "Customer enthusiastic feedback: '{review}'\n\n"
                "As a customer service representative, craft a response that:\n"
                "- Matches the customer's positive energy.\n"
                "- Highlights unexpected product benefits.\n"
                "- Invites the customer to join community events or programs.\n"
                "- Maintains the brand's voice (1-3 sentences).\n\n"
                "Response:"
            )
        }
    }  # Dictionary mapping each emotion to a prompt template
    # Get the template for the detected emotion, default to 'neutral' if not found
    template = emotion_strategies.get(dominant_emotion["label"].lower(), emotion_strategies["neutral"])["prompt"]
    prompt = template.format(review=user_review)  # Insert the user review into the template
    print(f"Generated prompt: {prompt}")  # Debug print using f-string
    return prompt  # Return the generated prompt

def response_gen(user_review):
    """
    Generate a response using text generation based on the user's comment.
    """
    prompt = prompt_gen(user_review)  # Get the generated prompt using the detected emotion template
    # Load the tokenizer and language model for text generation
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load tokenizer for text generation
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load causal language model for generation
    inputs = tokenizer(prompt, return_tensors="pt")  # Tokenize the prompt
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,  # Allow up to 100 new tokens for the answer
        min_length=30,  # Ensure a minimum length for the generated response
        no_repeat_ngram_size=2,  # Avoid repeated phrases
        temperature=0.7  # Use a moderate temperature for creativity
    )  # Generate response from the model
    input_length = inputs.input_ids.shape[1]  # Determine length of the input prompt
    response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)  # Extract only generated answer text
    print(f"Generated response: {response}")  # Debug print using f-string
    return response  # Return the generated response

##########################################
# Step 3: Text-to-Speech Conversion Function
##########################################
def sound_gen(response):
    """
    Convert the generated response to speech and embed an auto-playing audio player.
    """
    # Load SpeechT5 processor, TTS model, and vocoder
    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")  # Load TTS processor
    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")  # Load TTS model
    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")  # Load vocoder
    # Process the full generated response text for TTS
    inputs = processor(text=response, return_tensors="pt")  # Convert text to model input tensors
    # Use dummy speaker embeddings with shape (1,768) to avoid dimension mismatch
    speaker_embeddings = torch.zeros(1, 768, dtype=torch.float32)  # Create dummy speaker embedding
    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)  # Generate speech spectrogram
    with torch.no_grad():
        speech = vocoder(spectrogram)  # Convert spectrogram to waveform
    # Save the audio as a .wav file with 16kHz sampling rate
    sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)  # Write the waveform to file
    st.audio("customer_service_response.wav", start_time=0)  # Embed an auto-playing audio widget

##########################################
# Main Function
##########################################
def main():
    """
    Main function to orchestrate text generation and text-to-speech conversion.
    It displays only the generated response and plays its audio.
    """
    if text:  # Check if the user has entered a comment
        response = response_gen(text)  # Generate a response using text generation based on emotion
        st.markdown(
            f"<p style='color:#3498DB; font-size:20px;'>{response}</p>",
            unsafe_allow_html=True
        )  # Display the generated response in styled format
        sound_gen(response)  # Convert the generated response to speech and embed the audio player
        print(f"Final generated response: {response}")  # Debug print using f-string

# Execute the main function when the script is run
if __name__ == "__main__":
    main()  # Call the main function