gobeldan commited on
Commit
e75d2f9
·
verified ·
1 Parent(s): 241ece2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -57
app.py CHANGED
@@ -12,7 +12,25 @@ import pynvml
12
  logging.basicConfig(level=logging.INFO)
13
  last_model = None
14
  model = None
15
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def get_free_gpu_memory():
18
  pynvml.nvmlInit()
@@ -99,62 +117,31 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
99
  return files_out, vtt, txt
100
 
101
 
102
- with gr.Blocks(title="Fast Whisper WebUI") as demo:
103
- description = "faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models."
104
- article = "Read the [documentation here](https://github.com/SYSTRAN/faster-whisper)."
105
- whisper_models = [
106
- "tiny", "tiny.en",
107
- "base", "base.en", "Systran/faster-whisper-base.en", "Systran/faster-whisper-base",
108
- "small", "small.en", "distil-small.en", "Systran/faster-distil-whisper-small.en",
109
- "medium", "medium.en", "distil-medium.en", "Systran/faster-distil-whisper-medium.en", "Systran/faster-whisper-medium",
110
- "large",
111
- "large-v1", "Systran/faster-whisper-large-v1",
112
- "large-v2", "distil-large-v2", "Systran/faster-distil-whisper-large-v2",
113
- "large-v3", "distil-large-v3", "Systran/faster-distil-whisper-large-v3", "distil-whisper/distil-large-v3-ct2",
114
- "ivrit-ai/whisper-large-v3-turbo-ct2", "deepdml/faster-whisper-large-v3-turbo-ct2",
115
- "distil-whisper/distil-large-v3.5-ct2",
116
- ]
117
- compute_types = [
118
- "auto", "default", "int8", "int8_float32",
119
- "int8_float16", "int8_bfloat16", "int16",
120
- "float16", "float32", "bfloat16"
121
- ]
122
-
123
-
124
- # settings
125
- # cant put Dropdown in inputs
126
- # with gr.Accordion("Settings", open=False):
127
- # task = gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
128
- # chunk_length = gr.Number(label='chunk_length',value=30, interactive = True),
129
- # compute_type = gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
130
- # beam_size = gr.Number(label='beam_size',value=5, interactive = True),
131
- # vad_filter = gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
132
- # vad_min_silence_duration_ms = gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
133
-
134
- gr.Interface(
135
- fn=transcribe_webui_simple_progress,
136
- description=description,
137
- article=article,
138
- inputs=[
139
- gr.Dropdown(choices=whisper_models, value="distil-large-v2", label="Model", info="Select whisper model", interactive = True,),
140
- gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
141
- gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
142
- gr.File(label="Upload Files", file_count="multiple"),
143
- gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio"),
144
-
145
- gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
146
- gr.Number(label='chunk_length',value=30, interactive = True),
147
- gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
148
- gr.Number(label='beam_size',value=5, interactive = True),
149
- gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
150
- gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
151
- ],
152
- outputs=[
153
- gr.File(label="Download"),
154
- gr.Text(label="Transcription"),
155
- gr.Text(label="Segments"),
156
- ]
157
- )
158
 
159
  if __name__ == "__main__":
160
  demo.queue(default_concurrency_limit=get_workers_count())
 
12
  logging.basicConfig(level=logging.INFO)
13
  last_model = None
14
  model = None
15
+ description = "faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models."
16
+ article = "Read the [documentation here](https://github.com/SYSTRAN/faster-whisper)."
17
+ whisper_models = [
18
+ "tiny", "tiny.en",
19
+ "base", "base.en", "Systran/faster-whisper-base.en", "Systran/faster-whisper-base",
20
+ "small", "small.en", "distil-small.en", "Systran/faster-distil-whisper-small.en",
21
+ "medium", "medium.en", "distil-medium.en", "Systran/faster-distil-whisper-medium.en", "Systran/faster-whisper-medium",
22
+ "large",
23
+ "large-v1", "Systran/faster-whisper-large-v1",
24
+ "large-v2", "distil-large-v2", "Systran/faster-distil-whisper-large-v2",
25
+ "large-v3", "distil-large-v3", "Systran/faster-distil-whisper-large-v3", "distil-whisper/distil-large-v3-ct2",
26
+ "ivrit-ai/whisper-large-v3-turbo-ct2", "deepdml/faster-whisper-large-v3-turbo-ct2",
27
+ "distil-whisper/distil-large-v3.5-ct2",
28
+ ]
29
+ compute_types = [
30
+ "auto", "default", "int8", "int8_float32",
31
+ "int8_float16", "int8_bfloat16", "int16",
32
+ "float16", "float32", "bfloat16"
33
+ ]
34
 
35
  def get_free_gpu_memory():
36
  pynvml.nvmlInit()
 
117
  return files_out, vtt, txt
118
 
119
 
120
+
121
+ demo = gr.Interface(
122
+ fn=transcribe_webui_simple_progress,
123
+ description=description,
124
+ article=article,
125
+ inputs=[
126
+ gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v3.5-ct2", label="Model", info="Select whisper model", interactive = True,),
127
+ gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
128
+ gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
129
+ gr.File(label="Upload Files", file_count="multiple"),
130
+ gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio"),
131
+ gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
132
+ gr.Number(label='chunk_length',value=30, interactive = True),
133
+ gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
134
+ gr.Number(label='beam_size',value=5, interactive = True),
135
+ gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
136
+ gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
137
+ ],
138
+ outputs=[
139
+ gr.File(label="Download"),
140
+ gr.Text(label="Transcription"),
141
+ gr.Text(label="Segments"),
142
+ ],
143
+ title="Fast Whisper WebUI"
144
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  if __name__ == "__main__":
147
  demo.queue(default_concurrency_limit=get_workers_count())