Spaces:

TabasumDev
/

V2VBot

Sleeping

App Files Files Community

TabasumDev commited on Sep 27, 2024

Commit

cb78863

verified ·

1 Parent(s): 7239c1c

Initialized app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import gradio as gr
+from pydub import AudioSegment  # For handling audio files
+from gtts import gTTS
+import whisper  # Correct import from openai-whisper package
+from groq import Groq
+import tempfile  # For managing temporary audio file creation
+# Load Whisper model
+whisper_model = whisper.load_model("base")
+client = Groq(api_key="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")
+def transcribe_audio(audio_file):
+    # Since the audio is already in .wav, we directly pass it to Whisper
+    result = whisper_model.transcribe(audio_file)
+    return result['text']
+def get_response(prompt):
+    # Generate response using Llama 8B via Groq API
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": prompt}],
+        model="llama3-8b-8192",
+    )
+    return chat_completion.choices[0].message.content
+def text_to_speech(text):
+    # Convert text to speech using gTTS
+    tts = gTTS(text)
+    # Save TTS output to a temporary file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
+        tts.save(temp_audio_file.name)
+        return temp_audio_file.name  # Return the file path of the .wav file
+def chatbot(audio_file):
+    # 1. Transcribe audio to text
+    user_input = transcribe_audio(audio_file)
+    print(f"Transcribed text: {user_input}")  # Debugging output
+    # 2. Get response from Llama 8B based on transcribed input
+    response = get_response(user_input)
+    print(f"Llama response: {response}")  # Debugging output
+    # 3. Convert the response text to speech
+    audio_output = text_to_speech(response)
+    print(f"Generated audio output: {audio_output}")  # Debugging output
+    return audio_output  # Return the .wav audio file path for Gradio to play
+# Gradio interface
+iface = gr.Interface(
+    fn=chatbot,
+    inputs=gr.Audio(type="filepath", format="wav"),  # Accept .wav audio file input (mic or upload)
+    outputs=gr.Audio(type="filepath", format="wav"),  # Output is the file path to the generated .wav audio
+    live=True,
+    title="Voice to Voice Chatbot",
+    description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
+)
+iface.launch()