Spaces:
Running
Running
""" | |
Visualization module. | |
Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts, | |
and to generate the complete results HTML page with an embedded audio element and progress status. | |
""" | |
from itertools import zip_longest | |
from jiwer import process_words | |
import hashlib | |
def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str: | |
""" | |
Generate an HTML visualization of the alignment between reference and hypothesis texts. | |
Args: | |
ref: The reference text. | |
hyp: The hypothesis (transcribed) text. | |
title: A title for the evaluation block (e.g., model name). | |
model_id: A unique identifier for the model (used in word IDs). | |
Returns: | |
An HTML string visualizing word-level alignments and error metrics. | |
""" | |
# Use the title as model_id if none provided | |
if model_id is None: | |
model_id = hashlib.md5(title.encode()).hexdigest()[:8] | |
# Process word alignment via jiwer | |
word_output = process_words(ref, hyp) | |
alignment_chunks = word_output.alignments[0] | |
columns = [] | |
ref_position = 0 # This tracks the position in the reference text | |
for chunk in alignment_chunks: | |
if chunk.type == "equal": | |
words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
for word in words: | |
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
columns.append((ref_cell, hyp_cell, ref_position)) | |
ref_position += 1 | |
elif chunk.type == "delete": | |
words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
for word in words: | |
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>' | |
columns.append((ref_cell, hyp_cell, ref_position)) | |
ref_position += 1 | |
elif chunk.type == "insert": | |
words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] | |
# For inserted words, they are linked to the previous reference position | |
# If we're at the beginning, use position 0 | |
last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0 | |
for word in words: | |
ref_cell = '<span> </span>' | |
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>' | |
columns.append((ref_cell, hyp_cell, last_ref_pos)) | |
# Note: ref_position is NOT incremented for inserts | |
elif chunk.type == "substitute": | |
ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] | |
for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""): | |
if ref_word: # Only increment position for actual reference words | |
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>' | |
if hyp_word: | |
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>' | |
else: | |
hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>' | |
columns.append((ref_cell, hyp_cell, ref_position)) | |
ref_position += 1 | |
elif hyp_word: # Extra hypothesis words with no reference pair | |
# Link to previous reference position | |
last_ref_pos = max(0, ref_position - 1) | |
ref_cell = '<span> </span>' | |
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>' | |
columns.append((ref_cell, hyp_cell, last_ref_pos)) | |
# Create HTML visualization | |
html_blocks = [] | |
metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%" | |
summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}" | |
html_blocks.append( | |
f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>" | |
f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>" | |
f"<div>{title}</div>" | |
f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>" | |
) | |
flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">' | |
for ref_cell, hyp_cell, ref_pos in columns: | |
cell_html = ( | |
f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; ' | |
'padding-left: 1em; font-family: monospace;">' | |
f'<div style="text-align: center;">{ref_cell}</div>' | |
f'<div style="text-align: center;">{hyp_cell}</div>' | |
'</div>' | |
) | |
flex_container += cell_html | |
flex_container += '</div>' | |
html_blocks.append(flex_container) | |
html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>' | |
return html_string | |
def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str: | |
""" | |
Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status. | |
Args: | |
dataset_description: A string describing the dataset. | |
html_blocks: A list of HTML strings (one per model evaluation). | |
audio_file: The filename of the saved audio sample. | |
timestamp: The timestamp string used in titles. | |
progress: A tuple (done, total) indicating the number of models evaluated so far. | |
Returns: | |
A complete HTML document as a string. | |
""" | |
progress_html = "" | |
auto_scroll_to_bottom_on_load = "" | |
if progress: | |
done, total = progress | |
progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>" | |
if done < total: | |
auto_scroll_to_bottom_on_load = """ | |
<script type="text/javascript"> | |
document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight; | |
</script> | |
""" | |
refresh_page_control = """ | |
<button onclick="location.reload();">Refresh Page</button> | |
""" | |
audio_element = f""" | |
<div style="margin-bottom: 20px;"> | |
<audio controls> | |
<source src="{audio_file}" type="audio/mp3"> | |
Your browser does not support the audio element. | |
</audio> | |
</div> | |
""" | |
# Add JavaScript for reference-based word highlighting with sticky functionality | |
highlighting_js = """ | |
<script type="text/javascript"> | |
document.addEventListener('DOMContentLoaded', function() { | |
// Track the currently selected reference position | |
let selectedRefPos = null; | |
// Helper function to apply highlighting | |
function highlightPosition(refPos, isSticky = false) { | |
// Apply highlighting style | |
const highlightStyle = 'underline'; | |
// Highlight all elements with the matching reference position | |
document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => { | |
el.style.textDecoration = highlightStyle; | |
el.style.textDecorationThickness = '2px'; | |
el.style.textDecorationColor = isSticky ? 'red' : 'blue'; | |
}); | |
} | |
// Helper function to remove highlighting | |
function removeHighlighting(refPos) { | |
// Don't remove highlighting if this is the selected position | |
if (refPos === selectedRefPos) return; | |
document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => { | |
el.style.textDecoration = 'none'; | |
}); | |
} | |
// Helper function to clear all sticky highlighting | |
function clearStickyHighlighting() { | |
if (selectedRefPos !== null) { | |
document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => { | |
el.style.textDecoration = 'none'; | |
}); | |
selectedRefPos = null; | |
} | |
} | |
// Use event delegation for all word-alignment-containers | |
document.querySelectorAll('.word-alignment-container').forEach(container => { | |
// Mouseover (replaces mouseenter on individual elements) | |
container.addEventListener('mouseover', function(event) { | |
const target = event.target.closest('.word-item'); | |
if (!target) return; | |
const refPos = target.dataset.refPos; | |
if (!refPos) return; | |
highlightPosition(refPos, false); | |
}); | |
// Mouseout (replaces mouseleave on individual elements) | |
container.addEventListener('mouseout', function(event) { | |
const target = event.target.closest('.word-item'); | |
if (!target) return; | |
const refPos = target.dataset.refPos; | |
if (!refPos) return; | |
removeHighlighting(refPos); | |
}); | |
// Click for sticky highlighting | |
container.addEventListener('click', function(event) { | |
const target = event.target.closest('.word-item'); | |
if (!target) return; | |
const refPos = target.dataset.refPos; | |
if (!refPos) return; | |
// If this position is already selected, clear it | |
if (selectedRefPos === refPos) { | |
clearStickyHighlighting(); | |
} else { | |
// Clear any existing sticky highlighting | |
clearStickyHighlighting(); | |
// Set new selected position | |
selectedRefPos = refPos; | |
// Apply sticky highlighting | |
highlightPosition(refPos, true); | |
} | |
}); | |
}); | |
// Add a click handler on the document to clear sticky highlighting when clicking elsewhere | |
document.addEventListener('click', function(e) { | |
// If the click wasn't on a word item or word pair, clear sticky highlighting | |
if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) { | |
clearStickyHighlighting(); | |
} | |
}); | |
}); | |
</script> | |
""" | |
# Add CSS for hover effects | |
highlighting_css = """ | |
<style> | |
.word-item { | |
cursor: pointer; | |
transition: all 0.2s; | |
} | |
</style> | |
""" | |
results_html = f""" | |
<html dir="rtl" lang="he"> | |
<head> | |
<meta charset="utf-8"> | |
<title>Evaluation Results - {dataset_description} - {timestamp}</title> | |
{highlighting_css} | |
</head> | |
<body> | |
<h3>Evaluation Results - {dataset_description} - {timestamp}</h3> | |
{progress_html}{refresh_page_control} | |
{audio_element} | |
<div id="results-container" style="max-height: 80vh; overflow-y: auto;"> | |
{''.join(html_blocks)} | |
</div> | |
{highlighting_js} | |
{auto_scroll_to_bottom_on_load} | |
</body> | |
</html> | |
""" | |
return results_html |