Spaces:

TabasumDev
/

V2VBot

Sleeping

App Files Files Community

V2VBot / app.py

TabasumDev

Update app.py

b2010ac verified 7 months ago

raw

history blame contribute delete

2.42 kB

	import os
	import tempfile # For managing temporary audio file creation
	import gradio as gr
	from pydub import AudioSegment # For handling audio files
	from gtts import gTTS
	import whisper # Correct import from openai-whisper package
	from groq import Groq

	# Load Whisper model
	whisper_model = whisper.load_model("base")

	# Retrieve the API key from environment variables (ensure you've added it to Secrets)
	groq_api_key = "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"
	client = Groq(api_key=groq_api_key)

	def transcribe_audio(audio_file):
	"""Transcribe audio to text using Whisper model."""
	result = whisper_model.transcribe(audio_file)
	return result['text']

	def get_response(prompt):
	"""Generate response using Llama 8B via Groq API."""
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	def text_to_speech(text):
	"""Convert text to speech using gTTS."""
	tts = gTTS(text)
	# Save TTS output to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
	tts.save(temp_audio_file.name)
	return temp_audio_file.name # Return the file path of the .wav file

	def chatbot(audio_file):
	"""Main function to handle audio input, generate response, and return audio output."""
	# 1. Transcribe audio to text
	user_input = transcribe_audio(audio_file)
	print(f"Transcribed text: {user_input}") # Debugging output

	# 2. Get response from Llama 8B based on transcribed input
	response = get_response(user_input)
	print(f"Llama response: {response}") # Debugging output

	# 3. Convert the response text to speech
	audio_output = text_to_speech(response)
	print(f"Generated audio output: {audio_output}") # Debugging output

	return audio_output # Return the .wav audio file path for Gradio to play

	# Gradio interface
	iface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload)
	outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio
	live=True,
	title="Voice to Voice Chatbot",
	description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
	)

	iface.launch()