|
from transformers import pipeline |
|
from transformers.utils import logging |
|
import torch |
|
import pandas as pd |
|
import time |
|
import gradio as gr |
|
|
|
logging.set_verbosity_error() |
|
|
|
asr = pipeline(task="automatic-speech-recognition", |
|
model ='openai/whisper-large-v3') |
|
|
|
translator = pipeline(task="translation", |
|
model="facebook/nllb-200-3.3B", max_length = 5120, |
|
|
|
torch_dtype=torch.bfloat16) |
|
|
|
flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252') |
|
flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code'])) |
|
flores_200_languages = list(flores_200.keys()) |
|
|
|
def transcribe_audio(filepath, src_language, tgt_language): |
|
|
|
target_language = flores_200_df.loc[int(tgt_language),'Language'] |
|
source_language = flores_200_df.loc[int(src_language),'Language'] |
|
print(f"Selected Source Language: {source_language}, Target Language: {target_language}") |
|
|
|
time.sleep(5) |
|
if filepath is None: |
|
gr.Warning("No audio found, please retry.") |
|
return "" |
|
|
|
english_transcript = asr( |
|
filepath, |
|
|
|
chunk_length_s=30, |
|
batch_size=8, |
|
)['text'] |
|
print(english_transcript) |
|
|
|
transcripts = english_transcript.split('.') |
|
|
|
translations = [] |
|
for tscript in transcripts: |
|
translation = translator(tscript, src_lang="eng_Latn", |
|
tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text'] |
|
translations.append(translation+'.') |
|
|
|
output = ' '.join(translations) |
|
|
|
print(output) |
|
return output |
|
|
|
demo = gr.Blocks() |
|
|
|
mic_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..", |
|
description="Speak into your system using your system mic, select your source & target languages and submit (if error appears, retry)", |
|
fn=transcribe_audio, |
|
inputs=[gr.Audio(sources="microphone", |
|
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'), |
|
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')], |
|
outputs=gr.Textbox(label="Transcription in Selected Target Language", |
|
lines=3), |
|
allow_flagging="never") |
|
|
|
file_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..", |
|
description="Upload an audio file, select your source & target languages and submit (if error appears, retry)", |
|
fn=transcribe_audio, |
|
inputs=[gr.Audio(sources="upload", |
|
type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'), |
|
gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')], |
|
outputs=gr.Textbox(label="Transcription in Selected Target Language", |
|
lines=3), |
|
allow_flagging="never", |
|
) |
|
|
|
with demo: |
|
gr.TabbedInterface( |
|
[mic_transcribe, |
|
file_transcribe], |
|
["Speak Through Microphone", |
|
"Upload Audio File"], |
|
) |
|
demo.launch(debug=True) |
|
|