Yehor commited on
Commit
441367e
·
1 Parent(s): 6d6b32e
.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .ruff_cache/
2
+ .venv/
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .idea/
2
+ .venv/
3
+ .ruff_cache/
4
+
5
+ flagged/
Dockerfile ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13.2-bookworm
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN apt-get update && \
6
+ apt-get upgrade -y && \
7
+ apt-get install -y --no-install-recommends \
8
+ git \
9
+ git-lfs \
10
+ wget \
11
+ curl \
12
+ ca-certificates \
13
+ # python build dependencies \
14
+ build-essential \
15
+ libssl-dev \
16
+ zlib1g-dev \
17
+ libbz2-dev \
18
+ libreadline-dev \
19
+ libsqlite3-dev \
20
+ libncursesw5-dev \
21
+ xz-utils \
22
+ tk-dev \
23
+ libxml2-dev \
24
+ libxmlsec1-dev \
25
+ libffi-dev \
26
+ liblzma-dev \
27
+ # gradio dependencies \
28
+ ffmpeg \
29
+ && apt-get clean \
30
+ && rm -rf /var/lib/apt/lists/*
31
+
32
+ RUN python -m ensurepip --upgrade && python -m pip install --upgrade pip
33
+
34
+ RUN useradd -m -u 1001 hf-space
35
+ USER hf-space
36
+
37
+ ENV HOME=/home/hf-space \
38
+ PATH=/home/hf-space/.local/bin:${PATH} \
39
+ PYTHONPATH=/home/hf-space/app \
40
+ PYTHONUNBUFFERED=1 \
41
+ GRADIO_ALLOW_FLAGGING=never \
42
+ GRADIO_NUM_PORTS=1 \
43
+ GRADIO_SERVER_NAME=0.0.0.0 \
44
+ GRADIO_THEME=huggingface \
45
+ SYSTEM=spaces \
46
+ HF_HOME=/home/hf-space/app/hf-home
47
+
48
+ COPY --chown=hf-space:hf-space . ${HOME}/app
49
+
50
+ WORKDIR ${HOME}/app
51
+
52
+ RUN mkdir ${HF_HOME} && chmod a+rwx ${HF_HOME}
53
+
54
+ RUN pip install --no-cache-dir -r /home/hf-space/app/requirements.txt
55
+
56
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,42 @@
1
  ---
2
- title: See Asr Outputs
3
- emoji: 🐨
4
- colorFrom: blue
5
- colorTo: red
6
  sdk: docker
7
- pinned: false
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: apache-2.0
3
+ title: See ASR outputs
 
 
4
  sdk: docker
5
+ emoji: 👀
6
+ colorFrom: green
7
+ colorTo: gray
8
+ short_description: 'See generated JSONL files made by ASR models as a dataframe'
9
  ---
10
 
11
+ ## Install
12
+
13
+ ```shell
14
+ uv venv --python 3.13.2
15
+
16
+ source .venv/bin/activate
17
+
18
+ uv pip install -r requirements.txt
19
+
20
+ # in development mode
21
+ uv pip install -r requirements-dev.txt
22
+ ```
23
+
24
+ ## Development
25
+
26
+ ```shell
27
+ gradio app.py
28
+ ```
29
+
30
+ ## Production
31
+
32
+ ### Build image
33
+
34
+ ```shell
35
+ docker build -t see-asr-outputs .
36
+ ```
37
+
38
+ ### Run
39
+
40
+ ```shell
41
+ docker run -it --rm -p 8888:7860 see-asr-outputs
42
+ ```
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import re
3
+
4
+ from importlib.metadata import version
5
+
6
+ import polars as pl
7
+ import gradio as gr
8
+
9
+ # Config
10
+ concurrency_limit = 5
11
+
12
+ title = "See ASR Outputs"
13
+
14
+ # https://www.tablesgenerator.com/markdown_tables
15
+ authors_table = """
16
+ ## Authors
17
+
18
+ Follow them on social networks and **contact** if you need any help or have any questions:
19
+
20
+ | <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
21
+ |-------------------------------------------------------------------------------------------------|
22
+ | https://t.me/smlkw in Telegram |
23
+ | https://x.com/yehor_smoliakov at X |
24
+ | https://github.com/egorsmkv at GitHub |
25
+ | https://huggingface.co/Yehor at Hugging Face |
26
+ | or use [email protected] |
27
+ """.strip()
28
+
29
+ examples = [
30
+ ["evaluation_results.jsonl", False],
31
+ ["evaluation_results_batch.jsonl", True],
32
+ ]
33
+
34
+ description_head = f"""
35
+ # {title}
36
+
37
+ ## Overview
38
+
39
+ See generated JSONL files made by ASR models as a dataframe.
40
+ """.strip()
41
+
42
+ description_foot = f"""
43
+ {authors_table}
44
+ """.strip()
45
+
46
+ metrics_value = """
47
+ Metrics will appear here.
48
+ """.strip()
49
+
50
+ tech_env = f"""
51
+ #### Environment
52
+
53
+ - Python: {sys.version}
54
+ """.strip()
55
+
56
+ tech_libraries = f"""
57
+ #### Libraries
58
+
59
+ - gradio: {version("gradio")}
60
+ - polars: {version("polars")}
61
+ """.strip()
62
+
63
+
64
+ def inference(file_name, _batch_mode):
65
+ if not file_name:
66
+ raise gr.Error("Please paste your JSON file.")
67
+
68
+ df = pl.read_ndjson(file_name)
69
+
70
+
71
+ required_columns = [
72
+ "filename",
73
+ "inference_start",
74
+ "inference_end",
75
+ "inference_total",
76
+ "duration",
77
+ "reference",
78
+ "prediction",
79
+ ]
80
+ required_columns_batch = [
81
+ "inference_start",
82
+ "inference_end",
83
+ "inference_total",
84
+ "filenames",
85
+ "durations",
86
+ "references",
87
+ "predictions",
88
+ ]
89
+
90
+ if _batch_mode:
91
+ if not all(col in df.columns for col in required_columns_batch):
92
+ raise gr.Error(
93
+ f"Please provide a JSONL file with the following columns: {required_columns_batch}"
94
+ )
95
+ else:
96
+ if not all(col in df.columns for col in required_columns):
97
+ raise gr.Error(
98
+ f"Please provide a JSONL file with the following columns: {required_columns}"
99
+ )
100
+
101
+ # exclude inference_start, inference_end
102
+ df = df.drop(["inference_start", "inference_end", "filename"])
103
+
104
+ # round "inference_total" field to 2 decimal places
105
+ df = df.with_columns(pl.col("inference_total").round(2))
106
+
107
+ return df
108
+
109
+
110
+ demo = gr.Blocks(
111
+ title=title,
112
+ analytics_enabled=False,
113
+ theme=gr.themes.Base(),
114
+ )
115
+
116
+ with demo:
117
+ gr.Markdown(description_head)
118
+
119
+ gr.Markdown("## Usage")
120
+
121
+ with gr.Row():
122
+ df = gr.DataFrame(
123
+ label="Dataframe",
124
+ )
125
+
126
+ with gr.Row():
127
+ with gr.Column():
128
+ jsonl_file = gr.File(label="A JSONL file")
129
+
130
+ batch_mode = gr.Checkbox(
131
+ label="Use batch mode",
132
+ )
133
+
134
+
135
+ gr.Button("Show").click(
136
+ inference,
137
+ concurrency_limit=concurrency_limit,
138
+ inputs=[jsonl_file, batch_mode],
139
+ outputs=df,
140
+ )
141
+
142
+ with gr.Row():
143
+ gr.Examples(
144
+ label="Choose an example",
145
+ inputs=[jsonl_file, batch_mode],
146
+ examples=examples,
147
+ )
148
+
149
+ gr.Markdown(description_foot)
150
+
151
+ gr.Markdown("### Gradio app uses:")
152
+ gr.Markdown(tech_env)
153
+ gr.Markdown(tech_libraries)
154
+
155
+ if __name__ == "__main__":
156
+ demo.queue()
157
+ demo.launch()
evaluation_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluation_results_batch.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements-dev.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ruff
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.23.0
2
+
3
+ polars==1.26.0