File size: 6,124 Bytes
152d61c
 
 
e4cf4e2
40fdf9a
 
 
 
 
 
 
e4cf4e2
c39c802
152d61c
 
c39c802
152d61c
 
 
d49b90d
 
 
3970052
152d61c
f26186a
152d61c
 
40fdf9a
 
e4cf4e2
 
40fdf9a
e4cf4e2
152d61c
c39c802
152d61c
 
40fdf9a
 
e4cf4e2
40fdf9a
 
c39c802
40fdf9a
 
 
 
 
 
 
c39c802
40fdf9a
e4cf4e2
 
40fdf9a
 
 
 
e4cf4e2
40fdf9a
e4cf4e2
40fdf9a
 
152d61c
 
 
 
 
40fdf9a
e4cf4e2
40fdf9a
 
 
 
e4cf4e2
40fdf9a
 
 
e4cf4e2
40fdf9a
 
 
e4cf4e2
 
40fdf9a
 
e4cf4e2
40fdf9a
e4cf4e2
152d61c
 
 
 
 
40fdf9a
e4cf4e2
 
 
 
c39c802
e4cf4e2
152d61c
40fdf9a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st  # For building the web application
from transformers import ( 
    pipeline, 
    SpeechT5Processor, 
    SpeechT5ForTextToSpeech, 
    SpeechT5HifiGan, 
    AutoModelForCausalLM, 
    AutoTokenizer 
)  # For emotion analysis, text-to-speech, and text generation
from datasets import load_dataset  # For loading datasets (e.g., speaker embeddings)
import torch  # For tensor operations
import soundfile as sf  # For saving audio as .wav files

##########################################
# Streamlit application title and input
##########################################
st.title("🚀 Just Comment")  # Application title displayed to users
st.write("I'm listening to you, my friend~")  # Application description for users
text = st.text_area("Enter your comment", "")  # Text area for user input of comments

##########################################
# Step 1: Sentiment Analysis Function
##########################################
def analyze_dominant_emotion(user_review):
    """ Analyze the dominant emotion in the user's review using a text classification model. """
    emotion_classifier = pipeline("text-classification", model="Thea231/jhartmann_emotion_finetuning", return_all_scores=True)  # Load emotion classification model
    emotion_results = emotion_classifier(user_review)[0]  # Get emotion scores for the review
    dominant_emotion = max(emotion_results, key=lambda x: x['score'])  # Find the emotion with the highest confidence
    return dominant_emotion  # Return the dominant emotion (as a dict with label and score)

##########################################
# Step 2: Response Generation Function
##########################################
def response_gen(user_review):
    """ Generate a response based on the sentiment of the user's review. """
    dominant_emotion = analyze_dominant_emotion(user_review)  # Get dominant emotion for the input
    emotion_label = dominant_emotion['label'].lower()  # Extract emotion label

    # Define response templates for each emotion
    emotion_prompts = {
        "anger": "I appreciate your feedback and apologize for the inconvenience caused by '{review}'. We're committed to resolving this issue promptly and will ensure it doesn't happen again. Thank you for your patience.",
        "joy": "Thank you for your positive feedback on '{review}'! We're thrilled to hear you had a great experience and hope to serve you again soon.",
        "disgust": "We regret that your experience with '{review}' did not meet our standards. We will take immediate steps to address this issue and appreciate your understanding.",
        "fear": "Your safety is our priority. Regarding your concern about '{review}', we ensure that all our products meet strict safety standards. Please feel free to reach out for further assistance.",
        "neutral": "Thank you for your feedback on '{review}'. We value your input and would love to hear more about your experience to improve our services.",
        "sadness": "I'm sorry to hear that you were disappointed with '{review}'. We're here to help and would like to offer you a solution tailored to your needs.",
        "surprise": "We're glad to hear that '{review}' exceeded your expectations! Thank you for sharing your excitement with us."
    }

    # Format the prompt with the user's review
    prompt = emotion_prompts.get(emotion_label, "Neutral").format(review=user_review)

    # Load a pre-trained text generation model
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load tokenizer
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B")  # Load model
    inputs = tokenizer(prompt, return_tensors="pt")  # Tokenize the prompt

    outputs = model.generate(**inputs, max_new_tokens=100)  # Generate a response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)  # Decode the generated text
    return response.strip()[:200]  # Return a response trimmed to 200 characters

##########################################
# Step 3: Text-to-Speech Conversion Function
##########################################
def sound_gen(response):
    """ Convert the generated response to speech and save as a .wav file. """
    # Load the pre-trained TTS models
    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")  # Load processor
    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")  # Load TTS model
    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")  # Load vocoder

    # Load speaker embeddings (e.g., neutral female voice)
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")  # Load dataset
    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)  # Get speaker embeddings

    # Process the input text and generate a spectrogram
    inputs = processor(text=response, return_tensors="pt")  # Process the text
    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)  # Generate spectrogram

    # Use the vocoder to generate a waveform
    with torch.no_grad():
        speech = vocoder(spectrogram)  # Generate speech waveform

    # Save the generated speech as a .wav file
    sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)  # Save audio
    st.audio("customer_service_response.wav")  # Play the audio in Streamlit

##########################################
# Main Function
##########################################
def main():
    """ Main function to orchestrate the workflow of sentiment analysis, response generation, and text-to-speech. """
    if text:  # Check if the user entered a comment
        response = response_gen(text)  # Generate a response
        st.write(f"Generated response: {response}")  # Display the generated response
        sound_gen(response)  # Convert the response to speech and play it

# Run the main function
if __name__ == "__main__":
    main()  # Execute the main function