import time import solara import numpy as np from ipywebrtc import AudioRecorder, CameraStream, AudioStream from tempfile import NamedTemporaryFile from pywhispercpp.model import Model whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"] whisper_model = solara.reactive("tiny.en-q8_0") transcription = solara.reactive("") generation_time = solara.reactive("") @solara.component def Page(): with solara.Sidebar(): title = "Whisper STT" with solara.Head(): solara.Title(f"{title}") with solara.Column(style={"width": "100%", "padding": "50px"}): solara.Markdown(f"#{title}") solara.Markdown("## Send a voice message") solara.Markdown("### Recorder") w = Model(whisper_model.value) camera = CameraStream(constraints={'audio': True,'video':False}) recorder = AudioRecorder(stream=camera) display(recorder) def MyButton(): def transcribe_voice(): transcription.value = "" generation_time.value = "" with NamedTemporaryFile(suffix=".webm") as temp: with open(f"{temp.name}", 'wb') as f: f.write(recorder.audio.value) start_time = time.time() segments = w.transcribe(f"{temp.name}") for segment in segments: transcription.value += segment.text end_time = time.time() generation_time.value = np.round(end_time - start_time, 2) transcription.value += " " with solara.Row(): solara.Button("Send voice message", on_click=transcribe_voice) solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%") MyButton() solara.Markdown(f"### Transcription:") solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem") if generation_time.value != "": solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")