import sys import re from importlib.metadata import version import evaluate import polars as pl import gradio as gr from natsort import natsorted # Load evaluators wer = evaluate.load("wer") cer = evaluate.load("cer") # Config concurrency_limit = 5 title = "Evaluate ASR Outputs" # https://www.tablesgenerator.com/markdown_tables authors_table = """ ## Authors Follow them on social networks and **contact** if you need any help or have any questions: | **Yehor Smoliakov** | |-------------------------------------------------------------------------------------------------| | https://t.me/smlkw in Telegram | | https://x.com/yehor_smoliakov at X | | https://github.com/egorsmkv at GitHub | | https://huggingface.co/Yehor at Hugging Face | | or use egorsmkv@gmail.com | """.strip() examples = [ ["evaluation_results.jsonl", True, False], ] description_head = f""" # {title} ## Overview Upload a JSONL file generated by the ASR model. """.strip() description_foot = f""" {authors_table} """.strip() metrics_value = """ Metrics will appear here. """.strip() tech_env = f""" #### Environment - Python: {sys.version} """.strip() tech_libraries = f""" #### Libraries - evaluate: {version("evaluate")} - gradio: {version("gradio")} - jiwer: {version("jiwer")} - polars: {version("polars")} """.strip() def clean_value(x): s = ( x.replace("’", "'") .strip() .lower() .replace(":", " ") .replace(",", " ") .replace(".", " ") .replace("?", " ") .replace("!", " ") .replace("–", " ") .replace("«", " ") .replace("»", " ") .replace("—", " ") .replace("…", " ") .replace("/", " ") .replace("\\", " ") .replace("(", " ") .replace(")", " ") .replace("́", "") .replace('"', " ") ) s = re.sub(r" +", " ", s) s = s.strip() # print(s) return s def inference(file_name, clear_punctuation, show_chars, progress=gr.Progress()): if not file_name: raise gr.Error("Please paste your JSON file.") progress(0, desc="Calculating...") df = pl.read_ndjson(file_name) inference_seconds = df["inference_total"].sum() duration_seconds = df["duration"].sum() rtf = inference_seconds / duration_seconds references = df["reference"] if clear_punctuation: predictions = df["prediction"].map_elements(clean_value, return_dtype=pl.String) else: predictions = df["prediction"] # Evaluate wer_value = round(wer.compute(predictions=predictions, references=references), 4) cer_value = round(cer.compute(predictions=predictions, references=references), 4) inference_time = inference_seconds audio_duration = duration_seconds rtf = inference_time / audio_duration results = [] results.append(f"Metrics using `evaluate` library:") results.append("") results.append(f"- WER: {wer_value} metric, {round(wer_value * 100, 4)}%") results.append(f"- CER: {cer_value} metric, {round(cer_value * 100, 4)}%") results.append("") results.append(f"- Accuracy on words: {round(100 - 100 * wer_value, 4)}%") results.append(f"- Accuracy on chars: {round(100 - 100 * cer_value, 4)}%") results.append("") results.append( f"- Inference time: {round(inference_time, 4)} seconds, {round(inference_time / 60, 4)} mins, {round(inference_time / 60 / 60, 4)} hours" ) results.append( f"- Audio duration: {round(audio_duration, 4)} seconds, {round(audio_duration / 60 / 60, 4)} hours" ) results.append("") results.append(f"- RTF: {round(rtf, 4)}") if show_chars: all_chars = set() for pred in predictions: for c in pred: all_chars.add(c) sorted_chars = natsorted(list(all_chars)) results.append("") results.append(f"Chars in predictions:") results.append(f"{sorted_chars}") return "\n".join(results) demo = gr.Blocks( title=title, analytics_enabled=False, theme=gr.themes.Base(), ) with demo: gr.Markdown(description_head) gr.Markdown("## Usage") with gr.Row(): with gr.Column(): jsonl_file = gr.File(label="A JSONL file") clear_punctuation = gr.Checkbox( label="Clear punctuation, some chars and convert to lowercase", ) show_chars = gr.Checkbox( label="Show chars in predictions", ) metrics = gr.Textbox( label="Metrics", placeholder=metrics_value, show_copy_button=True, ) gr.Button("Calculate").click( inference, concurrency_limit=concurrency_limit, inputs=[jsonl_file, clear_punctuation, show_chars], outputs=metrics, ) with gr.Row(): gr.Examples( label="Choose an example", inputs=[jsonl_file, clear_punctuation, show_chars], examples=examples, ) gr.Markdown(description_foot) gr.Markdown("### Gradio app uses:") gr.Markdown(tech_env) gr.Markdown(tech_libraries) if __name__ == "__main__": demo.queue() demo.launch()