kostissz commited on
Commit
dcbed68
·
verified ·
1 Parent(s): d11c040

Add init demo version

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from pathlib import Path
3
+ from typing import Tuple
4
+
5
+ import gradio as gr
6
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
7
+ from whisper_bidec import decode_wav, get_logits_processor, load_corpus_from_sentences
8
+
9
+
10
+ def _parse_file(file_path: str) -> list[str]:
11
+ """Parse .txt / .md / .csv and return its content as a list of strings by splitting per new line or row."""
12
+
13
+ if file_path.endswith(".csv"):
14
+ sentences = []
15
+ with open(file_path, "r", encoding="utf-8") as f:
16
+ reader = csv.reader(f)
17
+ for row in reader:
18
+ sentences.append(row)
19
+ else:
20
+ with open(file_path, "r") as f:
21
+ sentences = f.readlines()
22
+ return sentences
23
+
24
+
25
+ def transcribe(
26
+ processor_name: str,
27
+ audio: str,
28
+ bias_strength: float,
29
+ bias_text: str | None,
30
+ bias_text_file: str | None,
31
+ ) -> Tuple[str, str]:
32
+ processor = WhisperProcessor.from_pretrained(processor_name)
33
+ model = WhisperForConditionalGeneration.from_pretrained(processor_name)
34
+
35
+ sentences = ""
36
+
37
+ if bias_text:
38
+ sentences = bias_text.split(",")
39
+ elif Path(bias_text_file).is_file():
40
+ sentences = _parse_file(bias_text_file)
41
+
42
+ if sentences:
43
+ corpus = load_corpus_from_sentences(sentences, processor)
44
+ logits_processor = get_logits_processor(
45
+ corpus=corpus, processor=processor, bias_towards_lm=bias_strength
46
+ )
47
+ text_with_bias = decode_wav(
48
+ model, processor, audio, logits_processor=logits_processor
49
+ )
50
+ else:
51
+ text_with_bias = ""
52
+
53
+ text_no_bias = decode_wav(model, processor, audio, logits_processor=None)
54
+
55
+ return text_no_bias, text_with_bias
56
+
57
+
58
+ def setup_gradio_demo():
59
+ css = """
60
+ #centered-column {
61
+ display: flex;
62
+ justify-content: center;
63
+ align-items: center;
64
+ flex-direction: column;
65
+ text-align: center;
66
+ }
67
+ """
68
+ with gr.Blocks(css=css) as demo:
69
+ gr.Markdown("# Whisper Bidec Demo")
70
+
71
+ gr.Markdown("## Step 1: Select a Whisper model")
72
+ processor = gr.Textbox(
73
+ value="openai/whisper-tiny.en", label="Whisper Model from Hugging Face"
74
+ )
75
+
76
+ gr.Markdown("## Step 2: Upload your audio file")
77
+ audio_clip = gr.Audio(type="filepath", label="Upload a WAV file")
78
+
79
+ gr.Markdown("## Step 3: Set your biasing text")
80
+ with gr.Row():
81
+ with gr.Column(scale=20):
82
+ gr.Markdown(
83
+ "You can add multiple possible sentences by separating them with a comma <,>."
84
+ )
85
+ bias_text = gr.Textbox(label="Write your biasing text here")
86
+ with gr.Column(scale=1, elem_id="centered-column"):
87
+ gr.Markdown("## OR")
88
+ with gr.Column(scale=20):
89
+ gr.Markdown(
90
+ "Note that each new line (.txt / .md) or row (.csv) will be treated as a separate sentence to bias towards to."
91
+ )
92
+ bias_text_file = gr.File(
93
+ label="Upload a file with multiple lines of text",
94
+ file_types=[".txt", ".md", ".csv"],
95
+ )
96
+
97
+ gr.Markdown("## Step 4: Set how much you want to bias towards the LM")
98
+ bias_amount = gr.Slider(
99
+ minimum=0.0,
100
+ maximum=1.0,
101
+ value=0.5,
102
+ step=0.1,
103
+ label="Bias strength",
104
+ interactive=True,
105
+ )
106
+
107
+ gr.Markdown("## Step 5: Get your transcription before and after biasing")
108
+ transcribe_button = gr.Button("Transcribe")
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ output = gr.Text(label="Output")
113
+ with gr.Column():
114
+ biased_output = gr.Text(label="Biased output")
115
+
116
+ transcribe_button.click(
117
+ fn=transcribe,
118
+ inputs=[
119
+ processor,
120
+ audio_clip,
121
+ bias_amount,
122
+ bias_text,
123
+ bias_text_file,
124
+ ],
125
+ outputs=[output, biased_output],
126
+ )
127
+ demo.launch()
128
+
129
+
130
+ if __name__ == "__main__":
131
+ setup_gradio_demo()