Spaces:

gobeldan
/

Fast-Whisper-Small-Webui

Running

App Files Files Community

gobeldan commited on 26 days ago

Commit

e75d2f9

verified ·

1 Parent(s): 241ece2

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -57

app.py CHANGED Viewed

@@ -12,7 +12,25 @@ import pynvml
 logging.basicConfig(level=logging.INFO)
 last_model = None
 model = None
 def get_free_gpu_memory():
     pynvml.nvmlInit()
@@ -99,62 +117,31 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
     return files_out, vtt, txt
-with gr.Blocks(title="Fast Whisper WebUI") as demo:
-    description = "faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models."
-    article = "Read the [documentation here](https://github.com/SYSTRAN/faster-whisper)."
-    whisper_models = [
-        "tiny", "tiny.en",
-        "base", "base.en", "Systran/faster-whisper-base.en", "Systran/faster-whisper-base",
-        "small", "small.en", "distil-small.en", "Systran/faster-distil-whisper-small.en",
-        "medium", "medium.en", "distil-medium.en", "Systran/faster-distil-whisper-medium.en", "Systran/faster-whisper-medium",
-        "large",
-        "large-v1", "Systran/faster-whisper-large-v1",
-        "large-v2", "distil-large-v2", "Systran/faster-distil-whisper-large-v2",
-        "large-v3", "distil-large-v3", "Systran/faster-distil-whisper-large-v3", "distil-whisper/distil-large-v3-ct2",
-        "ivrit-ai/whisper-large-v3-turbo-ct2", "deepdml/faster-whisper-large-v3-turbo-ct2",
-        "distil-whisper/distil-large-v3.5-ct2",
-    ]
-    compute_types = [
-        "auto", "default", "int8", "int8_float32",
-        "int8_float16", "int8_bfloat16", "int16",
-        "float16", "float32", "bfloat16"
-    ]
-    # settings
-    # cant put Dropdown in inputs
-    # with gr.Accordion("Settings", open=False):
-        # task = gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
-        # chunk_length = gr.Number(label='chunk_length',value=30, interactive = True),
-        # compute_type = gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
-        # beam_size = gr.Number(label='beam_size',value=5, interactive = True),
-        # vad_filter = gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
-        # vad_min_silence_duration_ms = gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
-    gr.Interface(
-        fn=transcribe_webui_simple_progress,
-        description=description,
-        article=article,
-        inputs=[
-            gr.Dropdown(choices=whisper_models, value="distil-large-v2", label="Model", info="Select whisper model", interactive = True,),
-            gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
-            gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
-            gr.File(label="Upload Files", file_count="multiple"),
-            gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio"),
-            gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
-            gr.Number(label='chunk_length',value=30, interactive = True),
-            gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
-            gr.Number(label='beam_size',value=5, interactive = True),
-            gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
-            gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
-        ],
-        outputs=[
-            gr.File(label="Download"),
-            gr.Text(label="Transcription"),
-            gr.Text(label="Segments"),
-        ]
-    )
 if __name__ == "__main__":
     demo.queue(default_concurrency_limit=get_workers_count())

 logging.basicConfig(level=logging.INFO)
 last_model = None
 model = None
+description = "faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models."
+article = "Read the [documentation here](https://github.com/SYSTRAN/faster-whisper)."
+whisper_models = [
+    "tiny", "tiny.en",
+    "base", "base.en", "Systran/faster-whisper-base.en", "Systran/faster-whisper-base",
+    "small", "small.en", "distil-small.en", "Systran/faster-distil-whisper-small.en",
+    "medium", "medium.en", "distil-medium.en", "Systran/faster-distil-whisper-medium.en", "Systran/faster-whisper-medium",
+    "large",
+    "large-v1", "Systran/faster-whisper-large-v1",
+    "large-v2", "distil-large-v2", "Systran/faster-distil-whisper-large-v2",
+    "large-v3", "distil-large-v3", "Systran/faster-distil-whisper-large-v3", "distil-whisper/distil-large-v3-ct2",
+    "ivrit-ai/whisper-large-v3-turbo-ct2", "deepdml/faster-whisper-large-v3-turbo-ct2",
+    "distil-whisper/distil-large-v3.5-ct2",
+]
+compute_types = [
+    "auto", "default", "int8", "int8_float32",
+    "int8_float16", "int8_bfloat16", "int16",
+    "float16", "float32", "bfloat16"
+]
 def get_free_gpu_memory():
     pynvml.nvmlInit()
     return files_out, vtt, txt
+demo = gr.Interface(
+    fn=transcribe_webui_simple_progress,
+    description=description,
+    article=article,
+    inputs=[
+        gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v3.5-ct2", label="Model", info="Select whisper model", interactive = True,),
+        gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
+        gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
+        gr.File(label="Upload Files", file_count="multiple"),
+        gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio"),
+        gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
+        gr.Number(label='chunk_length',value=30, interactive = True),
+        gr.Dropdown(label="compute_type", choices=compute_types, value="auto", interactive = True),
+        gr.Number(label='beam_size',value=5, interactive = True),
+        gr.Checkbox(label='vad_filter',info='Use vad_filter', value=True),
+        gr.Number(label='Vad min_silence_duration_ms',value=500, interactive = True),
+    ],
+    outputs=[
+        gr.File(label="Download"),
+        gr.Text(label="Transcription"),
+        gr.Text(label="Segments"),
+    ],
+    title="Fast Whisper WebUI"
+)
 if __name__ == "__main__":
     demo.queue(default_concurrency_limit=get_workers_count())