meraj12 commited on
Commit
7f4232c
Β·
verified Β·
1 Parent(s): d7b84b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -27
app.py CHANGED
@@ -1,45 +1,67 @@
1
  import streamlit as st
2
- from streamlit_mic import st_mic
 
3
  import whisper
4
  import openai
5
- from gtts import gTTS
6
  import tempfile
7
  import os
 
8
 
9
- # Set your API Key (Groq-compatible)
10
  openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
11
 
12
  # Load Whisper model
13
  model = whisper.load_model("base")
14
 
15
- st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
 
 
 
 
 
 
 
16
 
17
- st.info("🎀 Record your voice and click 'Stop' to process:")
 
 
 
18
 
19
- audio_data = st_mic()
 
 
20
 
21
- if audio_data:
22
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
23
- tmp.write(audio_data)
24
- tmp_path = tmp.name
 
 
 
 
 
25
 
26
- st.audio(tmp_path)
 
 
 
27
 
28
- st.info("πŸ”  Transcribing...")
29
- result = model.transcribe(tmp_path)
30
- user_input = result["text"]
31
- st.success(f"You said: {user_input}")
 
32
 
33
- st.info("🧠 Thinking...")
34
- response = openai.ChatCompletion.create(
35
- model="mixtral-8x7b-32768",
36
- messages=[{"role": "user", "content": user_input}]
37
- )
38
- answer = response["choices"][0]["message"]["content"]
39
- st.success(f"AI says: {answer}")
 
40
 
41
- # Convert to voice
42
- tts = gTTS(answer)
43
- tts_path = "output.mp3"
44
- tts.save(tts_path)
45
- st.audio(tts_path, format="audio/mp3")
 
1
  import streamlit as st
2
+ import pyaudio
3
+ import wave
4
  import whisper
5
  import openai
 
6
  import tempfile
7
  import os
8
+ from gtts import gTTS
9
 
10
+ # Set OpenAI API Key
11
  openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
12
 
13
  # Load Whisper model
14
  model = whisper.load_model("base")
15
 
16
+ # Function to record audio
17
+ def record_audio(filename="recorded.wav", duration=5):
18
+ p = pyaudio.PyAudio()
19
+ stream = p.open(format=pyaudio.paInt16,
20
+ channels=1,
21
+ rate=44100,
22
+ input=True,
23
+ frames_per_buffer=1024)
24
 
25
+ frames = []
26
+ for _ in range(0, int(44100 / 1024 * duration)):
27
+ data = stream.read(1024)
28
+ frames.append(data)
29
 
30
+ stream.stop_stream()
31
+ stream.close()
32
+ p.terminate()
33
 
34
+ with wave.open(filename, 'wb') as wf:
35
+ wf.setnchannels(1)
36
+ wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
37
+ wf.setframerate(44100)
38
+ wf.writeframes(b''.join(frames))
39
+
40
+ # Streamlit app UI
41
+ st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
42
+ st.info("🎀 Click the button to record your voice!")
43
 
44
+ if st.button("Record"):
45
+ with st.spinner("Recording..."):
46
+ record_audio("user_input.wav")
47
+ st.success("Recording finished!")
48
 
49
+ # Transcribing with Whisper
50
+ st.info("Transcribing...")
51
+ result = model.transcribe("user_input.wav")
52
+ user_input = result["text"]
53
+ st.success(f"You said: {user_input}")
54
 
55
+ # AI response with OpenAI
56
+ st.info("Thinking...")
57
+ response = openai.ChatCompletion.create(
58
+ model="gpt-3.5-turbo",
59
+ messages=[{"role": "user", "content": user_input}]
60
+ )
61
+ answer = response['choices'][0]['message']['content']
62
+ st.success(f"AI says: {answer}")
63
 
64
+ # Convert AI response to speech
65
+ tts = gTTS(answer)
66
+ tts.save("response.mp3")
67
+ st.audio("response.mp3", format="audio/mp3")