Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
import torchaudio | |
def translate(audio): | |
model_id_asr = "openai/whisper-small" | |
processor_asr = WhisperProcessor.from_pretrained(model_id_asr) | |
model_asr = WhisperForConditionalGeneration.from_pretrained(model_id_asr) | |
forced_decoder_ids = processor_asr.get_decoder_prompt_ids(language="tamil", task="translate") | |
input_features = processor_asr(audio["audio"]["array"], sampling_rate=audio["audio"]["sampling_rate"], return_tensors="pt").input_features | |
predicted_ids = model_asr.generate(input_features,forced_decoder_ids=forced_decoder_ids) | |
transcription = processor_asr.batch_decode(predicted_ids, skip_special_tokens=True) | |
return transcription[0] | |
def speech_to_speech_translation(audio_filepath): | |
waveform, sampling_rate = torchaudio.load(audio_filepath) | |
if sampling_rate != 16000: | |
resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000) | |
waveform = resampler(waveform) | |
sampling_rate = 16000 | |
audio_dict = { | |
"audio": { | |
"array": waveform.numpy(), | |
"sampling_rate": sampling_rate | |
} | |
} | |
translated_text = translate(audio_dict) | |
return translated_text | |
title = "Tamil Translator / Whisper AI" | |
description = """ | |
This is just a simple gradio app that can record spoken tamil audio and translates it into english language | |
""" | |
demo = gr.Blocks() | |
mic_translate = gr.Interface( | |
fn=speech_to_speech_translation, | |
inputs=gr.Audio(source="microphone", type="filepath"), | |
outputs=gr.Textbox(label="Translation"),allow_flagging="never",title=title, | |
description=description) | |
with demo: | |
gr.TabbedInterface([mic_translate], [" "]) | |
demo.launch(debug=True, share=False) |