aka7774 commited on
Commit
f45e20d
·
verified ·
1 Parent(s): bbd4e37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -40
app.py CHANGED
@@ -1,48 +1,12 @@
1
- import torch
2
  import gradio as gr
3
- from faster_whisper import WhisperModel
4
 
5
- model = None
6
- model_size = 'large-v3'
7
-
8
- def load_model(_model_size):
9
- global model_size, model
10
-
11
- if model_size != _model_size:
12
- model_size = _model_size
13
-
14
- if torch.cuda.is_available():
15
- model = WhisperModel(model_size, device="cuda", compute_type="float16")
16
- # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
17
- else:
18
- model = WhisperModel(model_size, device="cpu", compute_type="int8")
19
-
20
- def speech_to_text(audio_file, _model_size):
21
- global model_size, model
22
-
23
- load_model(_model_size)
24
-
25
- with torch.no_grad():
26
- segments, info = model.transcribe(
27
- audio_file,
28
- language='ja',
29
- beam_size=5,
30
- vad_filter=True,
31
- without_timestamps=False,
32
- )
33
-
34
- text = ''
35
- for segment in segments:
36
- text += f"{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
37
-
38
- return text
39
-
40
- load_model(model_size)
41
 
42
  gr.Interface(
43
- fn=speech_to_text,
44
  inputs=[
45
  gr.Audio(sources="upload", type="filepath"),
46
  gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
47
  ],
48
- outputs="text").launch()
 
1
+ import fn
2
  import gradio as gr
 
3
 
4
+ fn.load_model('large-v3')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  gr.Interface(
7
+ fn=fn.speech_to_text,
8
  inputs=[
9
  gr.Audio(sources="upload", type="filepath"),
10
  gr.Dropdown(value=model_size, choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
11
  ],
12
+ outputs=["text", "text"]).launch()