Spaces:

alonsosilva
/

whispercpp

Sleeping

App Files Files Community

alonsosilva commited on Feb 7

Commit

2e9aa1a

1 Parent(s): cfd87ed

Add app

Browse files

Files changed (5) hide show

Dockerfile +18 -0
app.py +47 -0
packages.txt +1 -0
requirements.txt +3 -0
style.css +28 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.12
+COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+ENV PATH="/home/user/.local/bin:$PATH"
+ENV UV_SYSTEM_PYTHON=1
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN uv pip install -r requirements.txt
+COPY --chown=user . /app
+# Switch to the "user" user
+USER user
+CMD ["solara", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import time
+import solara
+import numpy as np
+from ipywebrtc import AudioRecorder, CameraStream, AudioStream
+from tempfile import NamedTemporaryFile
+from pywhispercpp.model import Model
+whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"]
+whisper_model = solara.reactive("tiny.en-q8_0")
+transcription = solara.reactive("")
+generation_time = solara.reactive("")
+@solara.component
+def Page():
+    with solara.Sidebar():
+        title = "Whisper STT"
+        with solara.Head():
+            solara.Title(f"{title}")
+        with solara.Column(style={"width": "100%", "padding": "50px"}):
+            solara.Markdown(f"#{title}")
+            solara.Markdown("## Send a voice message")
+            solara.Markdown("### Recorder")
+            w = Model(whisper_model.value)
+            camera = CameraStream(constraints={'audio': True,'video':False})
+            recorder = AudioRecorder(stream=camera)
+            display(recorder)
+            def MyButton():
+                def transcribe_voice():
+                    transcription.value = ""
+                    generation_time.value = ""
+                    with NamedTemporaryFile(suffix=".webm") as temp:
+                        with open(f"{temp.name}", 'wb') as f:
+                            f.write(recorder.audio.value)
+                        start_time = time.time()
+                        segments = w.transcribe(f"{temp.name}")
+                        for segment in segments:
+                            transcription.value += segment.text
+                        end_time = time.time()
+                        generation_time.value = np.round(end_time - start_time, 2)
+                    transcription.value += " "
+                with solara.Row():
+                    solara.Button("Send voice message", on_click=transcribe_voice)
+                    solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
+            MyButton()
+    solara.Markdown(f"### Transcription:")
+    solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
+    if generation_time.value != "":
+        solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+solara
+ipywebrtc
+pywhispercpp

style.css ADDED Viewed

	@@ -0,0 +1,28 @@

+body {
+	padding: 2rem;
+	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
+}
+h1 {
+	font-size: 16px;
+	margin-top: 0;
+}
+p {
+	color: rgb(107, 114, 128);
+	font-size: 15px;
+	margin-bottom: 10px;
+	margin-top: 5px;
+}
+.card {
+	max-width: 620px;
+	margin: 0 auto;
+	padding: 16px;
+	border: 1px solid lightgray;
+	border-radius: 16px;
+}
+.card p:last-child {
+	margin-bottom: 0;
+}