Spaces:

TabasumDev
/

V2VBot

Sleeping

App Files Files Community

TabasumDev commited on Sep 27, 2024

Commit

b8bbb80

verified ·

1 Parent(s): 6896307

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -27

app.py CHANGED Viewed

@@ -1,24 +1,26 @@
 import os
 import gradio as gr
-from pydub import AudioSegment  # For handling audio files
 from gtts import gTTS
 import whisper  # Correct import from openai-whisper package
 from groq import Groq
-import tempfile  # For managing temporary audio file creation
-# Load Whisper model
-whisper_model = whisper.load_model("base")
 client = Groq(api_key="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")
 def transcribe_audio(audio_file):
-    # Since the audio is already in .wav, we directly pass it to Whisper
-    result = whisper_model.transcribe(audio_file)
     return result['text']
 def get_response(prompt):
-    # Generate response using Llama 8B via Groq API
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": prompt}],
         model="llama3-8b-8192",
@@ -26,36 +28,29 @@ def get_response(prompt):
     return chat_completion.choices[0].message.content
 def text_to_speech(text):
-    # Convert text to speech using gTTS
     tts = gTTS(text)
-    # Save TTS output to a temporary file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
-        tts.save(temp_audio_file.name)
-        return temp_audio_file.name  # Return the file path of the .wav file
 def chatbot(audio_file):
-    # 1. Transcribe audio to text
     user_input = transcribe_audio(audio_file)
-    print(f"Transcribed text: {user_input}")  # Debugging output
-    # 2. Get response from Llama 8B based on transcribed input
     response = get_response(user_input)
-    print(f"Llama response: {response}")  # Debugging output
-    # 3. Convert the response text to speech
     audio_output = text_to_speech(response)
-    print(f"Generated audio output: {audio_output}")  # Debugging output
-    return audio_output  # Return the .wav audio file path for Gradio to play
 # Gradio interface
 iface = gr.Interface(
     fn=chatbot,
-    inputs=gr.Audio(type="filepath", format="wav"),  # Accept .wav audio file input (mic or upload)
-    outputs=gr.Audio(type="filepath", format="wav"),  # Output is the file path to the generated .wav audio
     live=True,
     title="Voice to Voice Chatbot",
-    description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
 )
 iface.launch()

 import os
 import gradio as gr
+import numpy as np
 from gtts import gTTS
+import torch
 import whisper  # Correct import from openai-whisper package
 from groq import Groq
+import io
+import tempfile  # To handle temporary audio file saving
+# Initialize Groq API client
 client = Groq(api_key="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")
+# Load Whisper model
+whisper_model = whisper.load_model("base")  # Use 'whisper' directly
 def transcribe_audio(audio_file):
+    # Load audio
+    audio, sr = sf.read(audio_file)
+    # Transcribe audio using Whisper
+    result = whisper_model.transcribe(audio, language="en")
     return result['text']
 def get_response(prompt):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": prompt}],
         model="llama3-8b-8192",
     return chat_completion.choices[0].message.content
 def text_to_speech(text):
     tts = gTTS(text)
+    audio_buffer = io.BytesIO()
+    tts.save(audio_buffer)
+    audio_buffer.seek(0)
+    return audio_buffer
 def chatbot(audio_file):
+    # Transcribe audio to text
     user_input = transcribe_audio(audio_file)
+    # Get response from Llama 8B
     response = get_response(user_input)
+    # Convert response to speech
     audio_output = text_to_speech(response)
+    return audio_output
 # Gradio interface
 iface = gr.Interface(
     fn=chatbot,
+    inputs=gr.Audio(type="filepath"),  # Remove 'source' argument
+    outputs=gr.Audio(type="filepath"),
     live=True,
     title="Voice to Voice Chatbot",
+    description="Speak into the microphone, and the chatbot will respond!"
 )
 iface.launch()