File size: 2,907 Bytes
2e9aa1a
 
 
 
 
 
03485f2
2e9aa1a
03485f2
2e9aa1a
03485f2
2e9aa1a
 
03485f2
2e9aa1a
 
 
 
 
 
 
 
 
 
03485f2
 
 
 
 
 
 
 
 
 
 
 
2e9aa1a
 
03485f2
2e9aa1a
03485f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import time
import solara
import numpy as np
from ipywebrtc import AudioRecorder, CameraStream, AudioStream
from tempfile import NamedTemporaryFile
from pywhispercpp.model import Model
from solara.lab import use_task, Task

whisper_models = ["tiny.en-q5_1", "tiny.en-q8_0","tiny.en", "base.en-q5_1", "base.en", "small.en-q5_1", "small.en"]
whisper_model = solara.reactive("tiny.en-q8_0")
current_whisper_model = solara.reactive("tiny.en-q8_0")
transcription = solara.reactive("")
generation_time = solara.reactive("")
w = Model('tiny.en-q8_0')
@solara.component
def Page():
    with solara.Sidebar():
        title = "Whisper STT"
        with solara.Head():
            solara.Title(f"{title}")
        with solara.Column(style={"width": "100%", "padding": "50px"}):
            solara.Markdown(f"#{title}")
            solara.Markdown("## Send a voice message")
            solara.Markdown("### Recorder")
            with solara.Row():
                def load_model():
                    w = Model(whisper_model.value)
                    current_whisper_model.value = whisper_model.value
                    return 1
                solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
                #solara.Button("Load model", on_click=load_model)
            result : Task[int] = use_task(load_model, dependencies=[whisper_model.value])
            if result.finished:
                solara.Success(f"Current model: {current_whisper_model.value}")
            else:
                solara.ProgressLinear(result.pending)
            camera = CameraStream(constraints={'audio': True,'video':False})
            recorder = AudioRecorder(stream=camera)
            recorder.playing = False
            display(recorder)
            def transcribe_voice():
                transcription.value = ""
                generation_time.value = ""
                with NamedTemporaryFile(suffix=".webm") as temp:
                    with open(f"{temp.name}", 'wb') as f:
                        f.write(recorder.audio.value)
                    start_time = time.time()
                    segments = w.transcribe(f"{temp.name}")
                    for segment in segments:
                        transcription.value += segment.text
                    end_time = time.time()
                    generation_time.value = np.round(end_time - start_time, 2)
                transcription.value += " "
            with solara.Row():
                solara.Button("Send voice message", on_click=transcribe_voice)
    with solara.Column(style="padding: 50px"):
        solara.Markdown(f"### Transcription:")
        solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
        if generation_time.value != "":
            solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")