Spaces:
Running
Running
File size: 4,291 Bytes
7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 358065e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from fastapi import FastAPI
import librosa
import openai
from transformers import pipeline
import requests
import os
from pydantic import BaseModel
import numpy as np
# Initialize FastAPI
app = FastAPI()
# Initialize emotion classifier
text_emotion_classifier = pipeline("text-classification",
model="bhadresh-savani/distilbert-base-uncased-emotion",
device=-1)
# Environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x")
def analyze_text_emotion(text):
try:
emotion_result = text_emotion_classifier(text)
emotion_data = emotion_result[0]
return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}"
except Exception as e:
return f"Error: {str(e)}"
def analyze_voice_emotion(audio):
try:
if audio is None:
return "Please upload an audio file"
y = audio[1]
sr = audio[0]
pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean())
intensity = float(librosa.feature.rms(y=y).mean())
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
if pitch < 150 and intensity < 0.02:
emotion = "sadness"
elif pitch > 200 and intensity > 0.05:
emotion = "anger"
elif pitch > 150 and intensity < 0.03:
emotion = "joy"
else:
emotion = "anxiety"
return f"Emotion: {emotion}\nPitch: {pitch:.2f}\nIntensity: {intensity:.2f}\nTempo: {tempo:.2f}"
except Exception as e:
return f"Error analyzing audio: {str(e)}"
def chat_and_tts(message):
try:
if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY:
return "API keys not configured", None
openai.api_key = OPENAI_API_KEY
chat_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": message},
]
)
response_text = chat_response['choices'][0]['message']['content'].strip()
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {
"xi-api-key": ELEVEN_LABS_API_KEY,
"Content-Type": "application/json"
}
data = {
"text": response_text,
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}
response = requests.post(url, json=data, headers=headers)
if response.status_code != 200:
return response_text, None
audio_path = "response.mp3"
with open(audio_path, "wb") as f:
f.write(response.content)
return response_text, audio_path
except Exception as e:
return f"Error: {str(e)}", None
# Create Gradio interface
demo = gr.Blocks(title="AI Therapist")
with demo:
gr.Markdown("# AI Virtual Therapist")
with gr.Tab("Text Emotion Analysis"):
text_input = gr.Textbox(label="Enter text")
text_button = gr.Button("Analyze Text Emotion")
text_output = gr.Textbox(label="Emotion Analysis Result")
text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output)
with gr.Tab("Voice Emotion Analysis"):
audio_input = gr.Audio(label="Upload Audio", type="numpy")
audio_button = gr.Button("Analyze Voice Emotion")
audio_output = gr.Textbox(label="Voice Analysis Result")
audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output)
with gr.Tab("Chat with TTS"):
chat_input = gr.Textbox(label="Enter your message")
chat_button = gr.Button("Send Message")
chat_output = gr.Textbox(label="Assistant Response")
audio_output = gr.Audio(label="Voice Response")
chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |