TabasumDev commited on
Commit
b8bbb80
·
verified ·
1 Parent(s): 6896307

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -27
app.py CHANGED
@@ -1,24 +1,26 @@
1
  import os
2
  import gradio as gr
3
- from pydub import AudioSegment # For handling audio files
4
  from gtts import gTTS
 
5
  import whisper # Correct import from openai-whisper package
6
  from groq import Groq
7
- import tempfile # For managing temporary audio file creation
8
-
9
- # Load Whisper model
10
- whisper_model = whisper.load_model("base")
11
 
 
12
  client = Groq(api_key="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")
13
-
 
14
 
15
  def transcribe_audio(audio_file):
16
- # Since the audio is already in .wav, we directly pass it to Whisper
17
- result = whisper_model.transcribe(audio_file)
 
 
18
  return result['text']
19
 
20
  def get_response(prompt):
21
- # Generate response using Llama 8B via Groq API
22
  chat_completion = client.chat.completions.create(
23
  messages=[{"role": "user", "content": prompt}],
24
  model="llama3-8b-8192",
@@ -26,36 +28,29 @@ def get_response(prompt):
26
  return chat_completion.choices[0].message.content
27
 
28
  def text_to_speech(text):
29
- # Convert text to speech using gTTS
30
  tts = gTTS(text)
31
- # Save TTS output to a temporary file
32
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
33
- tts.save(temp_audio_file.name)
34
- return temp_audio_file.name # Return the file path of the .wav file
35
 
36
  def chatbot(audio_file):
37
- # 1. Transcribe audio to text
38
  user_input = transcribe_audio(audio_file)
39
- print(f"Transcribed text: {user_input}") # Debugging output
40
-
41
- # 2. Get response from Llama 8B based on transcribed input
42
  response = get_response(user_input)
43
- print(f"Llama response: {response}") # Debugging output
44
-
45
- # 3. Convert the response text to speech
46
  audio_output = text_to_speech(response)
47
- print(f"Generated audio output: {audio_output}") # Debugging output
48
-
49
- return audio_output # Return the .wav audio file path for Gradio to play
50
 
51
  # Gradio interface
52
  iface = gr.Interface(
53
  fn=chatbot,
54
- inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload)
55
- outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio
56
  live=True,
57
  title="Voice to Voice Chatbot",
58
- description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
59
  )
60
 
61
  iface.launch()
 
1
  import os
2
  import gradio as gr
3
+ import numpy as np
4
  from gtts import gTTS
5
+ import torch
6
  import whisper # Correct import from openai-whisper package
7
  from groq import Groq
8
+ import io
9
+ import tempfile # To handle temporary audio file saving
 
 
10
 
11
+ # Initialize Groq API client
12
  client = Groq(api_key="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")
13
+ # Load Whisper model
14
+ whisper_model = whisper.load_model("base") # Use 'whisper' directly
15
 
16
  def transcribe_audio(audio_file):
17
+ # Load audio
18
+ audio, sr = sf.read(audio_file)
19
+ # Transcribe audio using Whisper
20
+ result = whisper_model.transcribe(audio, language="en")
21
  return result['text']
22
 
23
  def get_response(prompt):
 
24
  chat_completion = client.chat.completions.create(
25
  messages=[{"role": "user", "content": prompt}],
26
  model="llama3-8b-8192",
 
28
  return chat_completion.choices[0].message.content
29
 
30
  def text_to_speech(text):
 
31
  tts = gTTS(text)
32
+ audio_buffer = io.BytesIO()
33
+ tts.save(audio_buffer)
34
+ audio_buffer.seek(0)
35
+ return audio_buffer
36
 
37
  def chatbot(audio_file):
38
+ # Transcribe audio to text
39
  user_input = transcribe_audio(audio_file)
40
+ # Get response from Llama 8B
 
 
41
  response = get_response(user_input)
42
+ # Convert response to speech
 
 
43
  audio_output = text_to_speech(response)
44
+ return audio_output
 
 
45
 
46
  # Gradio interface
47
  iface = gr.Interface(
48
  fn=chatbot,
49
+ inputs=gr.Audio(type="filepath"), # Remove 'source' argument
50
+ outputs=gr.Audio(type="filepath"),
51
  live=True,
52
  title="Voice to Voice Chatbot",
53
+ description="Speak into the microphone, and the chatbot will respond!"
54
  )
55
 
56
  iface.launch()