File size: 12,793 Bytes
2f5cf2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""
Visualization module.
Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts,
and to generate the complete results HTML page with an embedded audio element and progress status.
"""

from itertools import zip_longest
from jiwer import process_words
import hashlib

def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str:
    """
    Generate an HTML visualization of the alignment between reference and hypothesis texts.
    
    Args:
        ref: The reference text.
        hyp: The hypothesis (transcribed) text.
        title: A title for the evaluation block (e.g., model name).
        model_id: A unique identifier for the model (used in word IDs).
        
    Returns:
        An HTML string visualizing word-level alignments and error metrics.
    """
    # Use the title as model_id if none provided
    if model_id is None:
        model_id = hashlib.md5(title.encode()).hexdigest()[:8]
    
    # Process word alignment via jiwer
    word_output = process_words(ref, hyp)
    alignment_chunks = word_output.alignments[0]
    
    columns = []
    ref_position = 0  # This tracks the position in the reference text
    
    for chunk in alignment_chunks:
        if chunk.type == "equal":
            words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
            for word in words:
                ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
                hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
                columns.append((ref_cell, hyp_cell, ref_position))
                ref_position += 1
                
        elif chunk.type == "delete":
            words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
            for word in words:
                ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
                hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
                columns.append((ref_cell, hyp_cell, ref_position))
                ref_position += 1
                
        elif chunk.type == "insert":
            words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
            # For inserted words, they are linked to the previous reference position
            # If we're at the beginning, use position 0
            last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0
            for word in words:
                ref_cell = '<span>&nbsp;</span>'
                hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>'
                columns.append((ref_cell, hyp_cell, last_ref_pos))
                # Note: ref_position is NOT incremented for inserts
                
        elif chunk.type == "substitute":
            ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
            hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
            
            for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""):
                if ref_word:  # Only increment position for actual reference words
                    ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>'
                    if hyp_word:
                        hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>'
                    else:
                        hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
                    columns.append((ref_cell, hyp_cell, ref_position))
                    ref_position += 1
                elif hyp_word:  # Extra hypothesis words with no reference pair
                    # Link to previous reference position
                    last_ref_pos = max(0, ref_position - 1)
                    ref_cell = '<span>&nbsp;</span>'
                    hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>'
                    columns.append((ref_cell, hyp_cell, last_ref_pos))
    
    # Create HTML visualization
    html_blocks = []
    metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%"
    summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}"
    
    html_blocks.append(
        f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>"
        f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>"
        f"<div>{title}</div>"
        f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>"
    )
    
    flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">'
    for ref_cell, hyp_cell, ref_pos in columns:
        cell_html = (
            f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; '
            'padding-left: 1em; font-family: monospace;">'
            f'<div style="text-align: center;">{ref_cell}</div>'
            f'<div style="text-align: center;">{hyp_cell}</div>'
            '</div>'
        )
        flex_container += cell_html
    flex_container += '</div>'
    html_blocks.append(flex_container)
    
    html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>'
    
    return html_string

def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str:
    """
    Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status.
    
    Args:
        dataset_description: A string describing the dataset.
        html_blocks: A list of HTML strings (one per model evaluation).
        audio_file: The filename of the saved audio sample.
        timestamp: The timestamp string used in titles.
        progress: A tuple (done, total) indicating the number of models evaluated so far.
        
    Returns:
        A complete HTML document as a string.
    """
    progress_html = ""
    auto_scroll_to_bottom_on_load = ""
    if progress:
        done, total = progress
        progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>"
        if done < total:
            auto_scroll_to_bottom_on_load = """
            <script type="text/javascript">
                document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight;
            </script>
            """
    
    refresh_page_control = """
    <button onclick="location.reload();">Refresh Page</button>
    """
    audio_element = f"""
    <div style="margin-bottom: 20px;">
        <audio controls>
            <source src="{audio_file}" type="audio/mp3">
            Your browser does not support the audio element.
        </audio>
    </div>
    """
    
    # Add JavaScript for reference-based word highlighting with sticky functionality
    highlighting_js = """
    <script type="text/javascript">
        document.addEventListener('DOMContentLoaded', function() {
        // Track the currently selected reference position
        let selectedRefPos = null;
        
        // Helper function to apply highlighting
        function highlightPosition(refPos, isSticky = false) {
            // Apply highlighting style
            const highlightStyle = 'underline';
            
            // Highlight all elements with the matching reference position
            document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
                el.style.textDecoration = highlightStyle;
                el.style.textDecorationThickness = '2px';
                el.style.textDecorationColor = isSticky ? 'red' : 'blue';
            });
        }
        
        // Helper function to remove highlighting
        function removeHighlighting(refPos) {
            // Don't remove highlighting if this is the selected position
            if (refPos === selectedRefPos) return;
            
            document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
                el.style.textDecoration = 'none';
            });
        }
        
        // Helper function to clear all sticky highlighting
        function clearStickyHighlighting() {
            if (selectedRefPos !== null) {
                document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => {
                    el.style.textDecoration = 'none';
                });
                
                selectedRefPos = null;
            }
        }
        
        // Use event delegation for all word-alignment-containers
        document.querySelectorAll('.word-alignment-container').forEach(container => {
            // Mouseover (replaces mouseenter on individual elements)
            container.addEventListener('mouseover', function(event) {
                const target = event.target.closest('.word-item');
                if (!target) return;
                
                const refPos = target.dataset.refPos;
                if (!refPos) return;
                
                highlightPosition(refPos, false);
            });
            
            // Mouseout (replaces mouseleave on individual elements)
            container.addEventListener('mouseout', function(event) {
                const target = event.target.closest('.word-item');
                if (!target) return;
                
                const refPos = target.dataset.refPos;
                if (!refPos) return;
                
                removeHighlighting(refPos);
            });
            
            // Click for sticky highlighting
            container.addEventListener('click', function(event) {
                const target = event.target.closest('.word-item');
                if (!target) return;
                
                const refPos = target.dataset.refPos;
                if (!refPos) return;
                
                // If this position is already selected, clear it
                if (selectedRefPos === refPos) {
                    clearStickyHighlighting();
                } else {
                    // Clear any existing sticky highlighting
                    clearStickyHighlighting();
                    
                    // Set new selected position
                    selectedRefPos = refPos;
                    
                    // Apply sticky highlighting
                    highlightPosition(refPos, true);
                }
            });
        });
        
        // Add a click handler on the document to clear sticky highlighting when clicking elsewhere
        document.addEventListener('click', function(e) {
            // If the click wasn't on a word item or word pair, clear sticky highlighting
            if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) {
                clearStickyHighlighting();
            }
        });
    });
    </script>
    """
    
    # Add CSS for hover effects
    highlighting_css = """
    <style>
        .word-item {
            cursor: pointer;
            transition: all 0.2s;
        }
    </style>
    """
    
    results_html = f"""
    <html dir="rtl" lang="he"> 
    <head>
        <meta charset="utf-8">
        <title>Evaluation Results - {dataset_description} - {timestamp}</title>
        {highlighting_css}
    </head>
    <body>
        <h3>Evaluation Results - {dataset_description} - {timestamp}</h3>
        {progress_html}{refresh_page_control}
        {audio_element}
        <div id="results-container" style="max-height: 80vh; overflow-y: auto;">
        {''.join(html_blocks)}
        </div>
        {highlighting_js}
        {auto_scroll_to_bottom_on_load}
    </body>
    </html>
    """
    return results_html