meraj12 commited on
Commit
ab48168
Β·
verified Β·
1 Parent(s): 0a3696b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -32
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import streamlit as st
 
 
2
  import whisper
3
  import openai
4
  import tempfile
@@ -6,54 +8,59 @@ import os
6
  from gtts import gTTS
7
  from pydub import AudioSegment
8
 
9
- # Set your Groq-compatible OpenAI API key
10
- openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
11
-
12
  # Load Whisper model
13
  model = whisper.load_model("base")
14
 
15
- st.title("🎀 Voice-to-Voice Chat App")
 
 
 
 
 
 
16
 
17
- # Option to record or upload
18
- mode = st.radio("Choose input method:", ["πŸŽ™οΈ Record Voice", "πŸ“ Upload Voice"])
19
 
20
- audio_data = None
 
 
 
 
 
 
 
 
21
 
22
- if mode == "πŸŽ™οΈ Record Voice":
23
- audio_data = st.audio_recorder("Record your voice", format="audio/wav")
24
- elif mode == "πŸ“ Upload Voice":
25
- uploaded_file = st.file_uploader("Upload your voice message (MP3/WAV)", type=["mp3", "wav"])
26
- if uploaded_file:
27
- audio_data = uploaded_file.read()
28
 
29
- # If there's audio data
30
- if audio_data:
31
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
32
- tmp.write(audio_data)
33
- tmp_path = tmp.name
34
 
35
- # Transcribe using Whisper
36
- st.info("Transcribing...")
37
- result = model.transcribe(tmp_path)
 
 
 
 
 
 
38
  user_text = result["text"]
39
  st.success(f"You said: {user_text}")
40
 
41
- # Use Groq API (OpenAI-compatible)
42
- st.info("Thinking...")
43
  response = openai.ChatCompletion.create(
44
  model="mixtral-8x7b-32768",
45
  messages=[{"role": "user", "content": user_text}]
46
  )
47
- reply_text = response["choices"][0]["message"]["content"]
48
- st.success(f"AI says: {reply_text}")
49
 
50
  # Convert to speech
51
- tts = gTTS(reply_text)
52
- tts_path = "response.mp3"
53
  tts.save(tts_path)
54
-
55
- # Play the response
56
  st.audio(tts_path, format="audio/mp3")
57
-
58
- # Clean up
59
- os.remove(tmp_path)
 
1
  import streamlit as st
2
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
3
+ import av
4
  import whisper
5
  import openai
6
  import tempfile
 
8
  from gtts import gTTS
9
  from pydub import AudioSegment
10
 
 
 
 
11
  # Load Whisper model
12
  model = whisper.load_model("base")
13
 
14
+ # Set OpenAI (Groq-compatible) API Key
15
+ openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
16
+
17
+ st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
18
+
19
+ # Record audio using streamlit-webrtc
20
+ st.info("🎀 Please record your question below:")
21
 
22
+ audio_placeholder = st.empty()
 
23
 
24
+ webrtc_ctx = webrtc_streamer(
25
+ key="speech",
26
+ mode=WebRtcMode.SENDRECV,
27
+ client_settings=ClientSettings(
28
+ media_stream_constraints={"audio": True, "video": False},
29
+ rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
30
+ ),
31
+ audio_receiver_size=1024,
32
+ )
33
 
34
+ if "audio_buffer" not in st.session_state:
35
+ st.session_state.audio_buffer = b""
 
 
 
 
36
 
37
+ if webrtc_ctx.audio_receiver:
38
+ audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
39
+ for frame in audio_frames:
40
+ st.session_state.audio_buffer += frame.to_ndarray().tobytes()
 
41
 
42
+ if st.button("πŸ›‘ Process Voice"):
43
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
44
+ f.write(st.session_state.audio_buffer)
45
+ audio_path = f.name
46
+
47
+ st.audio(audio_path)
48
+
49
+ st.info("Transcribing with Whisper...")
50
+ result = model.transcribe(audio_path)
51
  user_text = result["text"]
52
  st.success(f"You said: {user_text}")
53
 
54
+ st.info("Generating AI response...")
 
55
  response = openai.ChatCompletion.create(
56
  model="mixtral-8x7b-32768",
57
  messages=[{"role": "user", "content": user_text}]
58
  )
59
+ reply = response['choices'][0]['message']['content']
60
+ st.success(f"AI says: {reply}")
61
 
62
  # Convert to speech
63
+ tts = gTTS(reply)
64
+ tts_path = "reply.mp3"
65
  tts.save(tts_path)
 
 
66
  st.audio(tts_path, format="audio/mp3")