Ritesh-hf commited on
Commit
73c6b98
·
verified ·
1 Parent(s): 25f8e15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -72
app.py CHANGED
@@ -1,72 +1,72 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import numpy as np
4
- import librosa
5
- import pandas as pd
6
-
7
-
8
- MODEL_NAME = "openai/whisper-tiny"
9
- BATCH_SIZE = 8
10
- # device = 0 if torch.cuda.is_available() else "cpu"
11
-
12
- pipe = pipeline(
13
- task="automatic-speech-recognition",
14
- model=MODEL_NAME,
15
- chunk_length_s=30,
16
- # device=device,
17
- )
18
-
19
- # eng_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
20
-
21
- def format_output_to_list(data):
22
- formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data])
23
- return formatted_list
24
-
25
- def transcribe(inputs, task):
26
- if inputs is None:
27
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
28
-
29
- output = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps="word", generate_kwargs={"task": task})
30
- text = output['text']
31
- timestamps = format_output_to_list(output['chunks'])
32
- return [text, timestamps]
33
-
34
- examples = [
35
- ["arabic_english_audios/audios/arabic_audio_1.wav"],
36
- ["arabic_english_audios/audios/arabic_audio_2.wav"],
37
- ["arabic_english_audios/audios/arabic_audio_3.wav"],
38
- ["arabic_english_audios/audios/arabic_audio_4.wav"],
39
- ["arabic_english_audios/audios/arabic_hate_audio_1.mp3"],
40
- ["arabic_english_audios/audios/arabic_hate_audio_2.mp3"],
41
- ["arabic_english_audios/audios/arabic_hate_audio_3.mp3"],
42
- ["arabic_english_audios/audios/english_audio_1.wav"],
43
- ["arabic_english_audios/audios/english_audio_2.mp3"],
44
- ["arabic_english_audios/audios/english_audio_3.mp3"],
45
- ["arabic_english_audios/audios/english_audio_4.mp3"],
46
- ["arabic_english_audios/audios/english_audio_5.mp3"],
47
- ["arabic_english_audios/audios/english_audio_6.wav"]
48
- ]
49
-
50
- with gr.Blocks(theme=gr.themes.Default()) as demo:
51
- gr.HTML("<h1 style='text-align: center;'>Transcribe Audio with Timestamps using whisper-large-v3</h1>")
52
- gr.Markdown("")
53
- with gr.Row():
54
- with gr.Column():
55
- audio_input = gr.Audio(sources=["upload", 'microphone'], type="filepath", label="Audio file")
56
- task = gr.Radio(["transcribe", "translate"], label="Task")
57
- with gr.Row():
58
- clear_button = gr.ClearButton(value="Clear")
59
- submit_button = gr.Button("Submit", variant="primary", )
60
-
61
- with gr.Column():
62
- transcript_output = gr.Text(label="Transcript")
63
- timestamp_output = gr.Text(label="Timestamp")
64
-
65
- examples = gr.Examples(examples, inputs=audio_input, outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20)
66
-
67
- submit_button.click(fn=transcribe, inputs=audio_input, outputs=[transcript_output, timestamp_output])
68
- clear_button.add([audio_input, transcript_output, timestamp_output])
69
-
70
-
71
- if __name__ == "__main__":
72
- demo.launch()
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import librosa
5
+ import pandas as pd
6
+
7
+
8
+ MODEL_NAME = "openai/whisper-large-v3"
9
+ BATCH_SIZE = 8
10
+ # device = 0 if torch.cuda.is_available() else "cpu"
11
+
12
+ pipe = pipeline(
13
+ task="automatic-speech-recognition",
14
+ model=MODEL_NAME,
15
+ chunk_length_s=30,
16
+ # device=device,
17
+ )
18
+
19
+ # eng_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
20
+
21
+ def format_output_to_list(data):
22
+ formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data])
23
+ return formatted_list
24
+
25
+ def transcribe(inputs, task):
26
+ if inputs is None:
27
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
28
+
29
+ output = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps="word", generate_kwargs={"task": task})
30
+ text = output['text']
31
+ timestamps = format_output_to_list(output['chunks'])
32
+ return [text, timestamps]
33
+
34
+ examples = [
35
+ ["arabic_english_audios/audios/arabic_audio_1.wav"],
36
+ ["arabic_english_audios/audios/arabic_audio_2.wav"],
37
+ ["arabic_english_audios/audios/arabic_audio_3.wav"],
38
+ ["arabic_english_audios/audios/arabic_audio_4.wav"],
39
+ ["arabic_english_audios/audios/arabic_hate_audio_1.mp3"],
40
+ ["arabic_english_audios/audios/arabic_hate_audio_2.mp3"],
41
+ ["arabic_english_audios/audios/arabic_hate_audio_3.mp3"],
42
+ ["arabic_english_audios/audios/english_audio_1.wav"],
43
+ ["arabic_english_audios/audios/english_audio_2.mp3"],
44
+ ["arabic_english_audios/audios/english_audio_3.mp3"],
45
+ ["arabic_english_audios/audios/english_audio_4.mp3"],
46
+ ["arabic_english_audios/audios/english_audio_5.mp3"],
47
+ ["arabic_english_audios/audios/english_audio_6.wav"]
48
+ ]
49
+
50
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
51
+ gr.HTML("<h1 style='text-align: center;'>Transcribe Audio with Timestamps using whisper-large-v3</h1>")
52
+ gr.Markdown("")
53
+ with gr.Row():
54
+ with gr.Column():
55
+ audio_input = gr.Audio(sources=["upload", 'microphone'], type="filepath", label="Audio file")
56
+ task = gr.Radio(["transcribe", "translate"], label="Task")
57
+ with gr.Row():
58
+ clear_button = gr.ClearButton(value="Clear")
59
+ submit_button = gr.Button("Submit", variant="primary", )
60
+
61
+ with gr.Column():
62
+ transcript_output = gr.Text(label="Transcript")
63
+ timestamp_output = gr.Text(label="Timestamp")
64
+
65
+ examples = gr.Examples(examples, inputs=audio_input, outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20)
66
+
67
+ submit_button.click(fn=transcribe, inputs=audio_input, outputs=[transcript_output, timestamp_output])
68
+ clear_button.add([audio_input, transcript_output, timestamp_output])
69
+
70
+
71
+ if __name__ == "__main__":
72
+ demo.launch()