Spaces:

meraj12
/

art

Build error

App Files Files Community

meraj12 commited on Apr 6

Commit

b46560b

verified ·

1 Parent(s): b20e1c3

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -90

app.py CHANGED Viewed

@@ -1,104 +1,54 @@
 import streamlit as st
 import tempfile
 import os
-import uuid
-import json
-from datetime import datetime
-from transformers import pipeline
-from bark import SAMPLE_RATE, generate_audio, preload_models
-import scipy.io.wavfile
 import requests
-from dotenv import load_dotenv
-import deepgram
-import torch
-# Load environment variables
-load_dotenv()
-# Set Deepgram API Key
-DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
-# Load Bark TTS models
-# Modify this part in the Bark generation code where the model is loaded
-checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
-preload_models()
-# Conversation history
-if "history" not in st.session_state:
-    st.session_state.history = []
-st.set_page_config(page_title="Voice Chat App", layout="centered")
-st.title("🗣️ Voice-Based Conversational App")
-st.sidebar.header("🎛️ Settings")
-language = st.sidebar.selectbox("Select Language", ["en", "es", "fr", "de", "it"])
-emotion = st.sidebar.selectbox("Select Emotion", ["neutral", "happy", "sad", "angry"])
-voice_avatar = st.sidebar.selectbox("Select Voice Avatar", ["v2/en_speaker_1", "v2/en_speaker_9", "v2/en_speaker_5", "v2/en_speaker_0"])
-st.markdown("Speak or upload a voice file to start chatting with the AI")
-# Voice input
-voice_input = st.file_uploader("Upload a voice file", type=["wav", "mp3", "m4a"])
-if voice_input:
-    # Save temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_file.write(voice_input.read())
-        tmp_path = tmp_file.name
-    # Transcribe using Deepgram
     st.info("Transcribing...")
-    # Upload file to Deepgram API
-    client = deepgram.Deepgram(DEEPGRAM_API_KEY)
-    with open(tmp_path, 'rb') as f:
-        response = client.transcription.sync_prerecorded(f, language='en')
-    # Extract transcription from the response
-    user_text = response['results']['channels'][0]['alternatives'][0]['transcript']
     st.success(f"You said: {user_text}")
-    # Chat with Groq API (LLaMA3 or Mixtral)
-    st.info("Generating response...")
-    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-    headers = {
-        "Authorization": f"Bearer {GROQ_API_KEY}",
-        "Content-Type": "application/json"
-    }
-    data = {
-        "model": "mixtral-8x7b-32768",
-        "messages": [{"role": "user", "content": user_text}],
-        "temperature": 0.7
-    }
-    response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, data=json.dumps(data))
-    reply_text = response.json()["choices"][0]["message"]["content"]
-    st.success(f"AI: {reply_text}")
-    # Save history
-    st.session_state.history.append({"user": user_text, "bot": reply_text})
-    # Convert to speech using Bark
-    st.info("Generating voice reply with Bark...")
-    audio_array = generate_audio(reply_text, history_prompt=voice_avatar)
-    output_path = f"output_{uuid.uuid4().hex}.wav"
-    scipy.io.wavfile.write(output_path, rate=SAMPLE_RATE, data=audio_array)
-    # Play response
-    audio_file = open(output_path, "rb")
-    audio_bytes = audio_file.read()
-    st.audio(audio_bytes, format="audio/wav")
-    # Clean up
     os.remove(tmp_path)
-    os.remove(output_path)
-# Show conversation history
-if st.session_state.history:
-    st.subheader("📜 Conversation History")
-    for i, item in enumerate(st.session_state.history):
-        st.markdown(f"**You:** {item['user']}")
-        st.markdown(f"**AI:** {item['bot']}")
-        st.markdown("---")

 import streamlit as st
+import whisper
+import openai
 import tempfile
 import os
 import requests
+from gtts import gTTS
+from pydub import AudioSegment
+from pydub.playback import play
+# Set your Groq-compatible OpenAI API key
+openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
+# Load Whisper model
+model = whisper.load_model("base")
+# Title
+st.title("🎙️ Voice-to-Voice Conversational App")
+# Upload or record voice
+uploaded_file = st.file_uploader("Upload your voice message (MP3/WAV)", type=["mp3", "wav"])
+if uploaded_file:
+    # Save audio to a temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        tmp.write(uploaded_file.read())
+        tmp_path = tmp.name
+    # Transcribe with Whisper
     st.info("Transcribing...")
+    result = model.transcribe(tmp_path)
+    user_text = result["text"]
     st.success(f"You said: {user_text}")
+    # Ask Groq/OpenAI
+    st.info("Thinking...")
+    response = openai.ChatCompletion.create(
+        model="mixtral-8x7b-32768",  # Groq supports this
+        messages=[{"role": "user", "content": user_text}]
+    )
+    reply_text = response["choices"][0]["message"]["content"]
+    st.success(f"AI says: {reply_text}")
+    # Convert to voice (TTS)
+    tts = gTTS(reply_text)
+    tts_path = "response.mp3"
+    tts.save(tts_path)
+    # Play the voice
+    audio = AudioSegment.from_file(tts_path)
+    st.audio(tts_path, format="audio/mp3")
+    # Clean up temp file
     os.remove(tmp_path)