vsrinivas commited on
Commit
1383518
·
verified ·
1 Parent(s): c471716

Create app.py

Browse files

Initial code upload

Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from transformers.utils import logging
3
+ import torch
4
+ import pandas as pd
5
+ import time
6
+ import gradio as gr
7
+
8
+ logging.set_verbosity_error()
9
+
10
+ asr = pipeline(task="automatic-speech-recognition",
11
+ model ='openai/whisper-large-v3')
12
+
13
+ translator = pipeline(task="translation",
14
+ model="facebook/nllb-200-3.3B", max_length = 5120,
15
+ # model="facebook/nllb-200-distilled-600M",
16
+ torch_dtype=torch.bfloat16)
17
+
18
+ flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252')
19
+ flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code']))
20
+ flores_200_languages = list(flores_200.keys())
21
+
22
+ def transcribe_audio(filepath, src_language, tgt_language):
23
+
24
+ target_language = flores_200_df.loc[int(tgt_language),'Language']
25
+ source_language = flores_200_df.loc[int(src_language),'Language']
26
+ print(f"Selected Source Language: {source_language}, Target Language: {target_language}")
27
+
28
+ time.sleep(5)
29
+ if filepath is None:
30
+ gr.Warning("No audio found, please retry.")
31
+ return ""
32
+
33
+ english_transcript = asr(
34
+ filepath,
35
+ # max_new_tokens=256,
36
+ chunk_length_s=30,
37
+ batch_size=8,
38
+ )['text']
39
+ print(english_transcript)
40
+
41
+ transcripts = english_transcript.split('.')
42
+
43
+ translations = []
44
+ for tscript in transcripts:
45
+ translation = translator(tscript, src_lang="eng_Latn",
46
+ tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text']
47
+ translations.append(translation+'.')
48
+
49
+ output = ' '.join(translations)LORES_200_code'])[0]['translation_text']
50
+
51
+ print(output)
52
+ return output
53
+
54
+ demo = gr.Blocks()
55
+
56
+ mic_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
57
+ description="Speak into your system using your system mic, select your source & target languages and submit (if error appears, retry)",
58
+ fn=transcribe_audio,
59
+ inputs=[gr.Audio(sources="microphone",
60
+ type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
61
+ gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
62
+ outputs=gr.Textbox(label="Transcription in Selected Target Language",
63
+ lines=3),
64
+ allow_flagging="never")
65
+
66
+ file_transcribe = gr.Interface(title="Transcribe Audio of Any Language into Any Language - test and demo app by Srinivas.V ..",
67
+ description="Upload an audio file, select your source & target languages and submit (if error appears, retry)",
68
+ fn=transcribe_audio,
69
+ inputs=[gr.Audio(sources="upload",
70
+ type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Source Language'),
71
+ gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')],
72
+ outputs=gr.Textbox(label="Transcription in Selected Target Language",
73
+ lines=3),
74
+ allow_flagging="never",
75
+ )
76
+
77
+ with demo:
78
+ gr.TabbedInterface(
79
+ [mic_transcribe,
80
+ file_transcribe],
81
+ ["Speak Through Microphone",
82
+ "Upload Audio File"],
83
+ )
84
+ demo.launch(debug=True)