File size: 1,917 Bytes
9b07f04
7f4232c
 
0a3696b
b46560b
b67ca14
9b07f04
7f4232c
5bb85a8
7f4232c
eab7da3
 
b46560b
 
b67ca14
7f4232c
 
 
 
 
 
 
 
ab48168
7f4232c
 
 
 
b20e1c3
7f4232c
 
 
b20e1c3
7f4232c
 
 
 
 
 
 
 
 
ab48168
7f4232c
 
 
 
ab48168
7f4232c
 
 
 
 
b67ca14
7f4232c
 
 
 
 
 
 
 
b46560b
7f4232c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
import pyaudio
import wave
import whisper
import openai
import tempfile
import os
from gtts import gTTS

# Set OpenAI API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")

# Load Whisper model
model = whisper.load_model("base")

# Function to record audio
def record_audio(filename="recorded.wav", duration=5):
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=44100,
                    input=True,
                    frames_per_buffer=1024)

    frames = []
    for _ in range(0, int(44100 / 1024 * duration)):
        data = stream.read(1024)
        frames.append(data)

    stream.stop_stream()
    stream.close()
    p.terminate()

    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        wf.writeframes(b''.join(frames))

# Streamlit app UI
st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
st.info("🎀 Click the button to record your voice!")

if st.button("Record"):
    with st.spinner("Recording..."):
        record_audio("user_input.wav")
        st.success("Recording finished!")

        # Transcribing with Whisper
        st.info("Transcribing...")
        result = model.transcribe("user_input.wav")
        user_input = result["text"]
        st.success(f"You said: {user_input}")

        # AI response with OpenAI
        st.info("Thinking...")
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": user_input}]
        )
        answer = response['choices'][0]['message']['content']
        st.success(f"AI says: {answer}")

        # Convert AI response to speech
        tts = gTTS(answer)
        tts.save("response.mp3")
        st.audio("response.mp3", format="audio/mp3")