alonsosilva commited on
Commit
2e9aa1a
·
1 Parent(s): cfd87ed
Files changed (5) hide show
  1. Dockerfile +18 -0
  2. app.py +47 -0
  3. packages.txt +1 -0
  4. requirements.txt +3 -0
  5. style.css +28 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+ COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv
3
+
4
+ # Set up a new user named "user" with user ID 1000
5
+ RUN useradd -m -u 1000 user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+ ENV UV_SYSTEM_PYTHON=1
8
+
9
+ WORKDIR /app
10
+
11
+ COPY --chown=user ./requirements.txt requirements.txt
12
+ RUN uv pip install -r requirements.txt
13
+
14
+ COPY --chown=user . /app
15
+ # Switch to the "user" user
16
+ USER user
17
+
18
+ CMD ["solara", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import solara
3
+ import numpy as np
4
+ from ipywebrtc import AudioRecorder, CameraStream, AudioStream
5
+ from tempfile import NamedTemporaryFile
6
+ from pywhispercpp.model import Model
7
+
8
+ whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"]
9
+ whisper_model = solara.reactive("tiny.en-q8_0")
10
+ transcription = solara.reactive("")
11
+ generation_time = solara.reactive("")
12
+ @solara.component
13
+ def Page():
14
+ with solara.Sidebar():
15
+ title = "Whisper STT"
16
+ with solara.Head():
17
+ solara.Title(f"{title}")
18
+ with solara.Column(style={"width": "100%", "padding": "50px"}):
19
+ solara.Markdown(f"#{title}")
20
+ solara.Markdown("## Send a voice message")
21
+ solara.Markdown("### Recorder")
22
+ w = Model(whisper_model.value)
23
+ camera = CameraStream(constraints={'audio': True,'video':False})
24
+ recorder = AudioRecorder(stream=camera)
25
+ display(recorder)
26
+ def MyButton():
27
+ def transcribe_voice():
28
+ transcription.value = ""
29
+ generation_time.value = ""
30
+ with NamedTemporaryFile(suffix=".webm") as temp:
31
+ with open(f"{temp.name}", 'wb') as f:
32
+ f.write(recorder.audio.value)
33
+ start_time = time.time()
34
+ segments = w.transcribe(f"{temp.name}")
35
+ for segment in segments:
36
+ transcription.value += segment.text
37
+ end_time = time.time()
38
+ generation_time.value = np.round(end_time - start_time, 2)
39
+ transcription.value += " "
40
+ with solara.Row():
41
+ solara.Button("Send voice message", on_click=transcribe_voice)
42
+ solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
43
+ MyButton()
44
+ solara.Markdown(f"### Transcription:")
45
+ solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
46
+ if generation_time.value != "":
47
+ solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ solara
2
+ ipywebrtc
3
+ pywhispercpp
style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }