vsrinivas's picture
Update app.py
2f34197 verified
raw
history blame
3.35 kB
from transformers import pipeline
from transformers.utils import logging
import torch
import pandas as pd
import time
import gradio as gr
logging.set_verbosity_error()
asr = pipeline(task="automatic-speech-recognition",
model ='openai/whisper-large-v3')
translator = pipeline(task="translation",
model="facebook/nllb-200-3.3B", max_length = 5120,
# model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252')
flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code']))
flores_200_languages = list(flores_200.keys())
def transcribe_audio(filepath, src_language, tgt_language):
target_language = flores_200_df.loc[int(tgt_language),'Language']
source_language = flores_200_df.loc[int(src_language),'Language']
print(f"Selected Source Language: {source_language}, Target Language: {target_language}")
time.sleep(5)
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
english_transcript = asr(
filepath,
# max_new_tokens=256,
chunk_length_s=30,
batch_size=8,
)['text']
print(english_transcript)
transcripts = english_transcript.split('.')
translations = []
for tscript in transcripts:
translation = translator(tscript, src_lang="eng_Latn",
tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text']
translations.append(translation+'.')
output = ' '.join(translations)
print(output)
return output
demo = gr.Blocks()
mic_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
description="Speak into your system using your system mic, select your source & target languages and submit (if error appears, retry)",
fn=transcribe_audio,
inputs=[gr.Audio(sources="microphone",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never")
file_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
description="Upload an audio file, select your source & target languages and submit (if error appears, retry)",
fn=transcribe_audio,
inputs=[gr.Audio(sources="upload",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Speak Through Microphone",
"Upload Audio File"],
)
demo.launch(debug=True)