TabasumDev commited on
Commit
b2010ac
·
verified ·
1 Parent(s): b8bbb80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -1,26 +1,25 @@
1
  import os
 
2
  import gradio as gr
3
- import numpy as np
4
  from gtts import gTTS
5
- import torch
6
  import whisper # Correct import from openai-whisper package
7
  from groq import Groq
8
- import io
9
- import tempfile # To handle temporary audio file saving
10
 
11
- # Initialize Groq API client
12
- client = Groq(api_key="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")
13
  # Load Whisper model
14
- whisper_model = whisper.load_model("base") # Use 'whisper' directly
 
 
 
 
15
 
16
  def transcribe_audio(audio_file):
17
- # Load audio
18
- audio, sr = sf.read(audio_file)
19
- # Transcribe audio using Whisper
20
- result = whisper_model.transcribe(audio, language="en")
21
  return result['text']
22
 
23
  def get_response(prompt):
 
24
  chat_completion = client.chat.completions.create(
25
  messages=[{"role": "user", "content": prompt}],
26
  model="llama3-8b-8192",
@@ -28,29 +27,37 @@ def get_response(prompt):
28
  return chat_completion.choices[0].message.content
29
 
30
  def text_to_speech(text):
 
31
  tts = gTTS(text)
32
- audio_buffer = io.BytesIO()
33
- tts.save(audio_buffer)
34
- audio_buffer.seek(0)
35
- return audio_buffer
36
 
37
  def chatbot(audio_file):
38
- # Transcribe audio to text
 
39
  user_input = transcribe_audio(audio_file)
40
- # Get response from Llama 8B
 
 
41
  response = get_response(user_input)
42
- # Convert response to speech
 
 
43
  audio_output = text_to_speech(response)
44
- return audio_output
 
 
45
 
46
  # Gradio interface
47
  iface = gr.Interface(
48
  fn=chatbot,
49
- inputs=gr.Audio(type="filepath"), # Remove 'source' argument
50
- outputs=gr.Audio(type="filepath"),
51
  live=True,
52
  title="Voice to Voice Chatbot",
53
- description="Speak into the microphone, and the chatbot will respond!"
54
  )
55
 
56
- iface.launch()
 
1
  import os
2
+ import tempfile # For managing temporary audio file creation
3
  import gradio as gr
4
+ from pydub import AudioSegment # For handling audio files
5
  from gtts import gTTS
 
6
  import whisper # Correct import from openai-whisper package
7
  from groq import Groq
 
 
8
 
 
 
9
  # Load Whisper model
10
+ whisper_model = whisper.load_model("base")
11
+
12
+ # Retrieve the API key from environment variables (ensure you've added it to Secrets)
13
+ groq_api_key = "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"
14
+ client = Groq(api_key=groq_api_key)
15
 
16
  def transcribe_audio(audio_file):
17
+ """Transcribe audio to text using Whisper model."""
18
+ result = whisper_model.transcribe(audio_file)
 
 
19
  return result['text']
20
 
21
  def get_response(prompt):
22
+ """Generate response using Llama 8B via Groq API."""
23
  chat_completion = client.chat.completions.create(
24
  messages=[{"role": "user", "content": prompt}],
25
  model="llama3-8b-8192",
 
27
  return chat_completion.choices[0].message.content
28
 
29
  def text_to_speech(text):
30
+ """Convert text to speech using gTTS."""
31
  tts = gTTS(text)
32
+ # Save TTS output to a temporary file
33
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
34
+ tts.save(temp_audio_file.name)
35
+ return temp_audio_file.name # Return the file path of the .wav file
36
 
37
  def chatbot(audio_file):
38
+ """Main function to handle audio input, generate response, and return audio output."""
39
+ # 1. Transcribe audio to text
40
  user_input = transcribe_audio(audio_file)
41
+ print(f"Transcribed text: {user_input}") # Debugging output
42
+
43
+ # 2. Get response from Llama 8B based on transcribed input
44
  response = get_response(user_input)
45
+ print(f"Llama response: {response}") # Debugging output
46
+
47
+ # 3. Convert the response text to speech
48
  audio_output = text_to_speech(response)
49
+ print(f"Generated audio output: {audio_output}") # Debugging output
50
+
51
+ return audio_output # Return the .wav audio file path for Gradio to play
52
 
53
  # Gradio interface
54
  iface = gr.Interface(
55
  fn=chatbot,
56
+ inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload)
57
+ outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio
58
  live=True,
59
  title="Voice to Voice Chatbot",
60
+ description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
61
  )
62
 
63
+ iface.launch()