Spaces:

meraj12
/

art

Build error

App Files Files Community

meraj12 commited on Apr 6

Commit

ab48168

verified ·

1 Parent(s): 0a3696b

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -32

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import streamlit as st
 import whisper
 import openai
 import tempfile
@@ -6,54 +8,59 @@ import os
 from gtts import gTTS
 from pydub import AudioSegment
-# Set your Groq-compatible OpenAI API key
-openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
 # Load Whisper model
 model = whisper.load_model("base")
-st.title("🎤 Voice-to-Voice Chat App")
-# Option to record or upload
-mode = st.radio("Choose input method:", ["🎙️ Record Voice", "📁 Upload Voice"])
-audio_data = None
-if mode == "🎙️ Record Voice":
-    audio_data = st.audio_recorder("Record your voice", format="audio/wav")
-elif mode == "📁 Upload Voice":
-    uploaded_file = st.file_uploader("Upload your voice message (MP3/WAV)", type=["mp3", "wav"])
-    if uploaded_file:
-        audio_data = uploaded_file.read()
-# If there's audio data
-if audio_data:
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        tmp.write(audio_data)
-        tmp_path = tmp.name
-    # Transcribe using Whisper
-    st.info("Transcribing...")
-    result = model.transcribe(tmp_path)
     user_text = result["text"]
     st.success(f"You said: {user_text}")
-    # Use Groq API (OpenAI-compatible)
-    st.info("Thinking...")
     response = openai.ChatCompletion.create(
         model="mixtral-8x7b-32768",
         messages=[{"role": "user", "content": user_text}]
     )
-    reply_text = response["choices"][0]["message"]["content"]
-    st.success(f"AI says: {reply_text}")
     # Convert to speech
-    tts = gTTS(reply_text)
-    tts_path = "response.mp3"
     tts.save(tts_path)
-    # Play the response
     st.audio(tts_path, format="audio/mp3")
-    # Clean up
-    os.remove(tmp_path)

 import streamlit as st
+from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
+import av
 import whisper
 import openai
 import tempfile
 from gtts import gTTS
 from pydub import AudioSegment
 # Load Whisper model
 model = whisper.load_model("base")
+# Set OpenAI (Groq-compatible) API Key
+openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
+st.title("🎙️ Voice-to-Voice Conversational App")
+# Record audio using streamlit-webrtc
+st.info("🎤 Please record your question below:")
+audio_placeholder = st.empty()
+webrtc_ctx = webrtc_streamer(
+    key="speech",
+    mode=WebRtcMode.SENDRECV,
+    client_settings=ClientSettings(
+        media_stream_constraints={"audio": True, "video": False},
+        rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+    ),
+    audio_receiver_size=1024,
+)
+if "audio_buffer" not in st.session_state:
+    st.session_state.audio_buffer = b""
+if webrtc_ctx.audio_receiver:
+    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
+    for frame in audio_frames:
+        st.session_state.audio_buffer += frame.to_ndarray().tobytes()
+if st.button("🛑 Process Voice"):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+        f.write(st.session_state.audio_buffer)
+        audio_path = f.name
+    st.audio(audio_path)
+    st.info("Transcribing with Whisper...")
+    result = model.transcribe(audio_path)
     user_text = result["text"]
     st.success(f"You said: {user_text}")
+    st.info("Generating AI response...")
     response = openai.ChatCompletion.create(
         model="mixtral-8x7b-32768",
         messages=[{"role": "user", "content": user_text}]
     )
+    reply = response['choices'][0]['message']['content']
+    st.success(f"AI says: {reply}")
     # Convert to speech
+    tts = gTTS(reply)
+    tts_path = "reply.mp3"
     tts.save(tts_path)
     st.audio(tts_path, format="audio/mp3")