File size: 4,291 Bytes
7085a87
358065e
7085a87
 
 
 
 
 
 
 
 
 
 
 
 
 
358065e
7085a87
358065e
7085a87
 
 
 
 
 
 
 
 
 
 
 
 
 
358065e
 
 
7085a87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358065e
7085a87
 
 
358065e
 
 
7085a87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358065e
 
 
7085a87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358065e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
from fastapi import FastAPI
import librosa
import openai
from transformers import pipeline
import requests
import os
from pydantic import BaseModel
import numpy as np

# Initialize FastAPI
app = FastAPI()

# Initialize emotion classifier
text_emotion_classifier = pipeline("text-classification", 
                                 model="bhadresh-savani/distilbert-base-uncased-emotion",
                                 device=-1)

# Environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x")

def analyze_text_emotion(text):
    try:
        emotion_result = text_emotion_classifier(text)
        emotion_data = emotion_result[0]
        return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}"
    except Exception as e:
        return f"Error: {str(e)}"

def analyze_voice_emotion(audio):
    try:
        if audio is None:
            return "Please upload an audio file"
        
        y = audio[1]
        sr = audio[0]
        
        pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean())
        intensity = float(librosa.feature.rms(y=y).mean())
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        if pitch < 150 and intensity < 0.02:
            emotion = "sadness"
        elif pitch > 200 and intensity > 0.05:
            emotion = "anger"
        elif pitch > 150 and intensity < 0.03:
            emotion = "joy"
        else:
            emotion = "anxiety"

        return f"Emotion: {emotion}\nPitch: {pitch:.2f}\nIntensity: {intensity:.2f}\nTempo: {tempo:.2f}"
    except Exception as e:
        return f"Error analyzing audio: {str(e)}"

def chat_and_tts(message):
    try:
        if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY:
            return "API keys not configured", None
            
        openai.api_key = OPENAI_API_KEY
        chat_response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": message},
            ]
        )
        response_text = chat_response['choices'][0]['message']['content'].strip()

        url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
        headers = {
            "xi-api-key": ELEVEN_LABS_API_KEY,
            "Content-Type": "application/json"
        }
        data = {
            "text": response_text,
            "voice_settings": {
                "stability": 0.75,
                "similarity_boost": 0.75
            }
        }
        response = requests.post(url, json=data, headers=headers)
        
        if response.status_code != 200:
            return response_text, None
            
        audio_path = "response.mp3"
        with open(audio_path, "wb") as f:
            f.write(response.content)
            
        return response_text, audio_path
    except Exception as e:
        return f"Error: {str(e)}", None

# Create Gradio interface
demo = gr.Blocks(title="AI Therapist")

with demo:
    gr.Markdown("# AI Virtual Therapist")
    
    with gr.Tab("Text Emotion Analysis"):
        text_input = gr.Textbox(label="Enter text")
        text_button = gr.Button("Analyze Text Emotion")
        text_output = gr.Textbox(label="Emotion Analysis Result")
        text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output)
    
    with gr.Tab("Voice Emotion Analysis"):
        audio_input = gr.Audio(label="Upload Audio", type="numpy")
        audio_button = gr.Button("Analyze Voice Emotion")
        audio_output = gr.Textbox(label="Voice Analysis Result")
        audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output)
    
    with gr.Tab("Chat with TTS"):
        chat_input = gr.Textbox(label="Enter your message")
        chat_button = gr.Button("Send Message")
        chat_output = gr.Textbox(label="Assistant Response")
        audio_output = gr.Audio(label="Voice Response")
        chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)