meraj12 commited on
Commit
b46560b
Β·
verified Β·
1 Parent(s): b20e1c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -90
app.py CHANGED
@@ -1,104 +1,54 @@
1
  import streamlit as st
 
 
2
  import tempfile
3
  import os
4
- import uuid
5
- import json
6
- from datetime import datetime
7
- from transformers import pipeline
8
- from bark import SAMPLE_RATE, generate_audio, preload_models
9
- import scipy.io.wavfile
10
  import requests
11
- from dotenv import load_dotenv
12
- import deepgram
13
- import torch
14
 
15
- # Load environment variables
16
- load_dotenv()
17
 
18
- # Set Deepgram API Key
19
- DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
20
 
21
- # Load Bark TTS models
 
22
 
 
 
23
 
24
- # Modify this part in the Bark generation code where the model is loaded
25
- checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
 
 
 
26
 
27
- preload_models()
28
-
29
- # Conversation history
30
- if "history" not in st.session_state:
31
- st.session_state.history = []
32
-
33
- st.set_page_config(page_title="Voice Chat App", layout="centered")
34
- st.title("πŸ—£οΈ Voice-Based Conversational App")
35
-
36
- st.sidebar.header("πŸŽ›οΈ Settings")
37
- language = st.sidebar.selectbox("Select Language", ["en", "es", "fr", "de", "it"])
38
- emotion = st.sidebar.selectbox("Select Emotion", ["neutral", "happy", "sad", "angry"])
39
- voice_avatar = st.sidebar.selectbox("Select Voice Avatar", ["v2/en_speaker_1", "v2/en_speaker_9", "v2/en_speaker_5", "v2/en_speaker_0"])
40
-
41
- st.markdown("Speak or upload a voice file to start chatting with the AI")
42
-
43
- # Voice input
44
- voice_input = st.file_uploader("Upload a voice file", type=["wav", "mp3", "m4a"])
45
-
46
- if voice_input:
47
- # Save temporary file
48
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
49
- tmp_file.write(voice_input.read())
50
- tmp_path = tmp_file.name
51
-
52
- # Transcribe using Deepgram
53
  st.info("Transcribing...")
54
-
55
- # Upload file to Deepgram API
56
- client = deepgram.Deepgram(DEEPGRAM_API_KEY)
57
- with open(tmp_path, 'rb') as f:
58
- response = client.transcription.sync_prerecorded(f, language='en')
59
-
60
- # Extract transcription from the response
61
- user_text = response['results']['channels'][0]['alternatives'][0]['transcript']
62
  st.success(f"You said: {user_text}")
63
 
64
- # Chat with Groq API (LLaMA3 or Mixtral)
65
- st.info("Generating response...")
66
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
67
- headers = {
68
- "Authorization": f"Bearer {GROQ_API_KEY}",
69
- "Content-Type": "application/json"
70
- }
71
- data = {
72
- "model": "mixtral-8x7b-32768",
73
- "messages": [{"role": "user", "content": user_text}],
74
- "temperature": 0.7
75
- }
76
- response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, data=json.dumps(data))
77
- reply_text = response.json()["choices"][0]["message"]["content"]
78
- st.success(f"AI: {reply_text}")
79
-
80
- # Save history
81
- st.session_state.history.append({"user": user_text, "bot": reply_text})
82
-
83
- # Convert to speech using Bark
84
- st.info("Generating voice reply with Bark...")
85
- audio_array = generate_audio(reply_text, history_prompt=voice_avatar)
86
- output_path = f"output_{uuid.uuid4().hex}.wav"
87
- scipy.io.wavfile.write(output_path, rate=SAMPLE_RATE, data=audio_array)
88
-
89
- # Play response
90
- audio_file = open(output_path, "rb")
91
- audio_bytes = audio_file.read()
92
- st.audio(audio_bytes, format="audio/wav")
93
-
94
- # Clean up
95
  os.remove(tmp_path)
96
- os.remove(output_path)
97
-
98
- # Show conversation history
99
- if st.session_state.history:
100
- st.subheader("πŸ“œ Conversation History")
101
- for i, item in enumerate(st.session_state.history):
102
- st.markdown(f"**You:** {item['user']}")
103
- st.markdown(f"**AI:** {item['bot']}")
104
- st.markdown("---")
 
1
  import streamlit as st
2
+ import whisper
3
+ import openai
4
  import tempfile
5
  import os
 
 
 
 
 
 
6
  import requests
7
+ from gtts import gTTS
8
+ from pydub import AudioSegment
9
+ from pydub.playback import play
10
 
11
+ # Set your Groq-compatible OpenAI API key
12
+ openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
13
 
14
+ # Load Whisper model
15
+ model = whisper.load_model("base")
16
 
17
+ # Title
18
+ st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
19
 
20
+ # Upload or record voice
21
+ uploaded_file = st.file_uploader("Upload your voice message (MP3/WAV)", type=["mp3", "wav"])
22
 
23
+ if uploaded_file:
24
+ # Save audio to a temp file
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
26
+ tmp.write(uploaded_file.read())
27
+ tmp_path = tmp.name
28
 
29
+ # Transcribe with Whisper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  st.info("Transcribing...")
31
+ result = model.transcribe(tmp_path)
32
+ user_text = result["text"]
 
 
 
 
 
 
33
  st.success(f"You said: {user_text}")
34
 
35
+ # Ask Groq/OpenAI
36
+ st.info("Thinking...")
37
+ response = openai.ChatCompletion.create(
38
+ model="mixtral-8x7b-32768", # Groq supports this
39
+ messages=[{"role": "user", "content": user_text}]
40
+ )
41
+ reply_text = response["choices"][0]["message"]["content"]
42
+ st.success(f"AI says: {reply_text}")
43
+
44
+ # Convert to voice (TTS)
45
+ tts = gTTS(reply_text)
46
+ tts_path = "response.mp3"
47
+ tts.save(tts_path)
48
+
49
+ # Play the voice
50
+ audio = AudioSegment.from_file(tts_path)
51
+ st.audio(tts_path, format="audio/mp3")
52
+
53
+ # Clean up temp file
 
 
 
 
 
 
 
 
 
 
 
 
54
  os.remove(tmp_path)