Spaces:

Yehor
/

see-asr-outputs

Sleeping

App Files Files Community

Yehor commited on Mar 25

Commit

441367e

1 Parent(s): 6d6b32e

Sync

Browse files

Files changed (9) hide show

.dockerignore +2 -0
.gitignore +5 -0
Dockerfile +56 -0
README.md +38 -6
app.py +157 -0
evaluation_results.jsonl +0 -0
evaluation_results_batch.jsonl +0 -0
requirements-dev.txt +1 -0
requirements.txt +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .ruff_cache/
2	+ .venv/

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.idea/
+.venv/
+.ruff_cache/
+flagged/

Dockerfile ADDED Viewed

	@@ -0,0 +1,56 @@

+FROM python:3.13.2-bookworm
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    ca-certificates \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+RUN python -m ensurepip --upgrade && python -m pip install --upgrade pip
+RUN useradd -m -u 1001 hf-space
+USER hf-space
+ENV HOME=/home/hf-space \
+    PATH=/home/hf-space/.local/bin:${PATH} \
+    PYTHONPATH=/home/hf-space/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces \
+    HF_HOME=/home/hf-space/app/hf-home
+COPY --chown=hf-space:hf-space . ${HOME}/app
+WORKDIR ${HOME}/app
+RUN mkdir ${HF_HOME} && chmod a+rwx ${HF_HOME}
+RUN pip install --no-cache-dir -r /home/hf-space/app/requirements.txt
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,42 @@
 ---
-title: See Asr Outputs
-emoji: 🐨
-colorFrom: blue
-colorTo: red
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+license: apache-2.0
+title: See ASR outputs
 sdk: docker
+emoji: 👀
+colorFrom: green
+colorTo: gray
+short_description: 'See generated JSONL files made by ASR models as a dataframe'
 ---
+## Install
+```shell
+uv venv --python 3.13.2
+source .venv/bin/activate
+uv pip install -r requirements.txt
+# in development mode
+uv pip install -r requirements-dev.txt
+```
+## Development
+```shell
+gradio app.py
+```
+## Production
+### Build image
+```shell
+docker build -t see-asr-outputs .
+```
+### Run
+```shell
+docker run -it --rm -p 8888:7860 see-asr-outputs
+```

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import sys
+import re
+from importlib.metadata import version
+import polars as pl
+import gradio as gr
+# Config
+concurrency_limit = 5
+title = "See ASR Outputs"
+# https://www.tablesgenerator.com/markdown_tables
+authors_table = """
+## Authors
+Follow them on social networks and **contact** if you need any help or have any questions:
+| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
+|-------------------------------------------------------------------------------------------------|
+| https://t.me/smlkw in Telegram                                                                  |
+| https://x.com/yehor_smoliakov at X                                                              |
+| https://github.com/egorsmkv at GitHub                                                           |
+| https://huggingface.co/Yehor at Hugging Face                                                    |
+| or use [email protected]                                                                       |
+""".strip()
+examples = [
+    ["evaluation_results.jsonl", False],
+    ["evaluation_results_batch.jsonl", True],
+]
+description_head = f"""
+# {title}
+## Overview
+See generated JSONL files made by ASR models as a dataframe.
+""".strip()
+description_foot = f"""
+{authors_table}
+""".strip()
+metrics_value = """
+Metrics will appear here.
+""".strip()
+tech_env = f"""
+#### Environment
+- Python: {sys.version}
+""".strip()
+tech_libraries = f"""
+#### Libraries
+- gradio: {version("gradio")}
+- polars: {version("polars")}
+""".strip()
+def inference(file_name, _batch_mode):
+    if not file_name:
+        raise gr.Error("Please paste your JSON file.")
+    df = pl.read_ndjson(file_name)
+    required_columns = [
+        "filename",
+        "inference_start",
+        "inference_end",
+        "inference_total",
+        "duration",
+        "reference",
+        "prediction",
+    ]
+    required_columns_batch = [
+        "inference_start",
+        "inference_end",
+        "inference_total",
+        "filenames",
+        "durations",
+        "references",
+        "predictions",
+    ]
+    if _batch_mode:
+        if not all(col in df.columns for col in required_columns_batch):
+            raise gr.Error(
+                f"Please provide a JSONL file with the following columns: {required_columns_batch}"
+            )
+    else:
+        if not all(col in df.columns for col in required_columns):
+            raise gr.Error(
+                f"Please provide a JSONL file with the following columns: {required_columns}"
+            )
+    # exclude inference_start, inference_end
+    df = df.drop(["inference_start", "inference_end", "filename"])
+    # round "inference_total" field to 2 decimal places
+    df = df.with_columns(pl.col("inference_total").round(2))
+    return df
+demo = gr.Blocks(
+    title=title,
+    analytics_enabled=False,
+    theme=gr.themes.Base(),
+)
+with demo:
+    gr.Markdown(description_head)
+    gr.Markdown("## Usage")
+    with gr.Row():
+        df = gr.DataFrame(
+            label="Dataframe",
+        )
+    with gr.Row():
+        with gr.Column():
+            jsonl_file = gr.File(label="A JSONL file")
+            batch_mode = gr.Checkbox(
+                label="Use batch mode",
+            )
+    gr.Button("Show").click(
+        inference,
+        concurrency_limit=concurrency_limit,
+        inputs=[jsonl_file, batch_mode],
+        outputs=df,
+    )
+    with gr.Row():
+        gr.Examples(
+            label="Choose an example",
+            inputs=[jsonl_file, batch_mode],
+            examples=examples,
+        )
+    gr.Markdown(description_foot)
+    gr.Markdown("### Gradio app uses:")
+    gr.Markdown(tech_env)
+    gr.Markdown(tech_libraries)
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

evaluation_results.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

evaluation_results_batch.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ruff

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ gradio==5.23.0
2	+
3	+ polars==1.26.0