Evaluation Results - {dataset_description}

""" Visualization module. Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts, and to generate the complete results HTML page with an embedded audio element and progress status. """ from itertools import zip_longest from jiwer import process_words import hashlib def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str: """ Generate an HTML visualization of the alignment between reference and hypothesis texts. Args: ref: The reference text. hyp: The hypothesis (transcribed) text. title: A title for the evaluation block (e.g., model name). model_id: A unique identifier for the model (used in word IDs). Returns: An HTML string visualizing word-level alignments and error metrics. """ # Use the title as model_id if none provided if model_id is None: model_id = hashlib.md5(title.encode()).hexdigest()[:8] # Process word alignment via jiwer word_output = process_words(ref, hyp) alignment_chunks = word_output.alignments[0] columns = [] ref_position = 0 # This tracks the position in the reference text for chunk in alignment_chunks: if chunk.type == "equal": words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] for word in words: ref_cell = f'{word}' hyp_cell = f'{word}' columns.append((ref_cell, hyp_cell, ref_position)) ref_position += 1 elif chunk.type == "delete": words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] for word in words: ref_cell = f'{word}' hyp_cell = ' ' columns.append((ref_cell, hyp_cell, ref_position)) ref_position += 1 elif chunk.type == "insert": words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] # For inserted words, they are linked to the previous reference position # If we're at the beginning, use position 0 last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0 for word in words: ref_cell = ' ' hyp_cell = f'{word}' columns.append((ref_cell, hyp_cell, last_ref_pos)) # Note: ref_position is NOT incremented for inserts elif chunk.type == "substitute": ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""): if ref_word: # Only increment position for actual reference words ref_cell = f'{ref_word}' if hyp_word: hyp_cell = f'{hyp_word}' else: hyp_cell = ' ' columns.append((ref_cell, hyp_cell, ref_position)) ref_position += 1 elif hyp_word: # Extra hypothesis words with no reference pair # Link to previous reference position last_ref_pos = max(0, ref_position - 1) ref_cell = ' ' hyp_cell = f'{hyp_word}' columns.append((ref_cell, hyp_cell, last_ref_pos)) # Create HTML visualization html_blocks = [] metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%" summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}" html_blocks.append( f"

" f"

{metrics_results_str}

" f"

{title}

" f"

{summary_operations_str}

" ) flex_container = f'

' for ref_cell, hyp_cell, ref_pos in columns: cell_html = ( f'

'
            f'{ref_cell}
'
            f'{hyp_cell}
'
            '

' ) flex_container += cell_html flex_container += '

' html_blocks.append(flex_container) html_string = f'

' + "\n".join(html_blocks) + '

' return html_string def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str: """ Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status. Args: dataset_description: A string describing the dataset. html_blocks: A list of HTML strings (one per model evaluation). audio_file: The filename of the saved audio sample. timestamp: The timestamp string used in titles. progress: A tuple (done, total) indicating the number of models evaluated so far. Returns: A complete HTML document as a string. """ progress_html = "" auto_scroll_to_bottom_on_load = "" if progress: done, total = progress progress_html = f"

Progress: {done} of {total} models evaluated.

" if done < total: auto_scroll_to_bottom_on_load = """ """ refresh_page_control = """ """ audio_element = f"""

""" # Add JavaScript for reference-based word highlighting with sticky functionality highlighting_js = """ """ # Add CSS for hover effects highlighting_css = """ """ results_html = f""" Evaluation Results - {dataset_description} - {timestamp} {highlighting_css}

Evaluation Results - {dataset_description} - {timestamp}

{progress_html}{refresh_page_control} {audio_element}

{''.join(html_blocks)}

{highlighting_js} {auto_scroll_to_bottom_on_load} """ return results_html