import time import solara import numpy as np from ipywebrtc import AudioRecorder, CameraStream, AudioStream from tempfile import NamedTemporaryFile from pywhispercpp.model import Model from solara.lab import use_task, Task whisper_models = ["tiny.en-q5_1", "tiny.en-q8_0","tiny.en", "base.en-q5_1", "base.en", "small.en-q5_1", "small.en"] whisper_model = solara.reactive("tiny.en-q8_0") current_whisper_model = solara.reactive("tiny.en-q8_0") transcription = solara.reactive("") generation_time = solara.reactive("") w = Model('tiny.en-q8_0') @solara.component def Page(): with solara.Sidebar(): title = "Whisper STT" with solara.Head(): solara.Title(f"{title}") with solara.Column(style={"width": "100%", "padding": "50px"}): solara.Markdown(f"#{title}") solara.Markdown("## Send a voice message") solara.Markdown("### Recorder") with solara.Row(): def load_model(): w = Model(whisper_model.value) current_whisper_model.value = whisper_model.value return 1 solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%") #solara.Button("Load model", on_click=load_model) result : Task[int] = use_task(load_model, dependencies=[whisper_model.value]) if result.finished: solara.Success(f"Current model: {current_whisper_model.value}") else: solara.ProgressLinear(result.pending) camera = CameraStream(constraints={'audio': True,'video':False}) recorder = AudioRecorder(stream=camera) recorder.playing = False display(recorder) def transcribe_voice(): transcription.value = "" generation_time.value = "" with NamedTemporaryFile(suffix=".webm") as temp: with open(f"{temp.name}", 'wb') as f: f.write(recorder.audio.value) start_time = time.time() segments = w.transcribe(f"{temp.name}") for segment in segments: transcription.value += segment.text end_time = time.time() generation_time.value = np.round(end_time - start_time, 2) transcription.value += " " with solara.Row(): solara.Button("Send voice message", on_click=transcribe_voice) with solara.Column(style="padding: 50px"): solara.Markdown(f"### Transcription:") solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem") if generation_time.value != "": solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")