vsrinivas's picture
Update app.py
2f34197 verified
from transformers import pipeline
from transformers.utils import logging
import torch
import pandas as pd
import time
import gradio as gr
logging.set_verbosity_error()
asr = pipeline(task="automatic-speech-recognition",
model ='openai/whisper-large-v3')
translator = pipeline(task="translation",
model="facebook/nllb-200-3.3B", max_length = 5120,
# model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252')
flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code']))
flores_200_languages = list(flores_200.keys())
def transcribe_audio(filepath, src_language, tgt_language):
target_language = flores_200_df.loc[int(tgt_language),'Language']
source_language = flores_200_df.loc[int(src_language),'Language']
print(f"Selected Source Language: {source_language}, Target Language: {target_language}")
time.sleep(5)
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
english_transcript = asr(
filepath,
# max_new_tokens=256,
chunk_length_s=30,
batch_size=8,
)['text']
print(english_transcript)
transcripts = english_transcript.split('.')
translations = []
for tscript in transcripts:
translation = translator(tscript, src_lang="eng_Latn",
tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text']
translations.append(translation+'.')
output = ' '.join(translations)
print(output)
return output
demo = gr.Blocks()
mic_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
description="Speak into your system using your system mic, select your source & target languages and submit (if error appears, retry)",
fn=transcribe_audio,
inputs=[gr.Audio(sources="microphone",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never")
file_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
description="Upload an audio file, select your source & target languages and submit (if error appears, retry)",
fn=transcribe_audio,
inputs=[gr.Audio(sources="upload",
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
outputs=gr.Textbox(label="Transcription in Selected Target Language",
lines=3),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Speak Through Microphone",
"Upload Audio File"],
)
demo.launch(debug=True)