Spaces:
Runtime error
Runtime error
File size: 1,803 Bytes
0e24f14 acb6e72 0e24f14 4c24256 0e24f14 d33b363 f8b664f d33b363 86634ef 14fc70e 86634ef d33b363 86634ef f8b664f 86634ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torchaudio
def translate(audio):
model_id_asr = "openai/whisper-small"
processor_asr = WhisperProcessor.from_pretrained(model_id_asr)
model_asr = WhisperForConditionalGeneration.from_pretrained(model_id_asr)
forced_decoder_ids = processor_asr.get_decoder_prompt_ids(language="tamil", task="translate")
input_features = processor_asr(audio["audio"]["array"], sampling_rate=audio["audio"]["sampling_rate"], return_tensors="pt").input_features
predicted_ids = model_asr.generate(input_features,forced_decoder_ids=forced_decoder_ids)
transcription = processor_asr.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription[0]
def speech_to_speech_translation(audio_filepath):
waveform, sampling_rate = torchaudio.load(audio_filepath)
if sampling_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
waveform = resampler(waveform)
sampling_rate = 16000
audio_dict = {
"audio": {
"array": waveform.numpy(),
"sampling_rate": sampling_rate
}
}
translated_text = translate(audio_dict)
return translated_text
title = "Tamil Translator / Whisper AI"
description = """
This is just a simple gradio app that can record spoken tamil audio and translates it into english language
"""
demo = gr.Blocks()
mic_translate = gr.Interface(
fn=speech_to_speech_translation,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs=gr.Textbox(label="Translation"),allow_flagging="never",title=title,
description=description)
with demo:
gr.TabbedInterface([mic_translate], [" "])
demo.launch(debug=True, share=False) |