Spaces:

yoad
/

visualize_eval_results

Running

visualize_eval_results / src /visual_eval /evaluator.py

Yoad

First commit with actual logic

2f5cf2f 4 days ago

2.06 kB

	"""
	Evaluator module.
	Provides functions to evaluate a given model on a dataset sample using the Faster Whisper model,
	and generate HTML visualization blocks of the word alignment.
	"""

	import concurrent.futures
	import gc
	import io
	import queue
	import threading
	from typing import Dict, Generator, List

	import soundfile as sf
	from hebrew import Hebrew
	from tqdm import tqdm
	from transformers.models.whisper.english_normalizer import BasicTextNormalizer

	from visual_eval.visualization import render_visualize_jiwer_result_html


	class HebrewTextNormalizer(BasicTextNormalizer):
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)

	superfluous_chars_to_remove = "\u061c" # Arabic letter mark
	superfluous_chars_to_remove += (
	"\u200b\u200c\u200d" # Zero-width space, non-joiner, joiner
	)
	superfluous_chars_to_remove += "\u200e\u200f" # LTR and RTL marks
	superfluous_chars_to_remove += (
	"\u202a\u202b\u202c\u202d\u202e" # LTR/RTL embedding, pop, override
	)
	superfluous_chars_to_remove += "\u2066\u2067\u2068\u2069" # Isolate controls
	superfluous_chars_to_remove += "\ufeff" # Zero-width no-break space
	self.superfluous_hebrew_unicode_symbols_translator = str.maketrans(
	{ord(c): None for c in superfluous_chars_to_remove}
	)

	self.quotes_translator = str.maketrans({ord(c): None for c in "\"'"})

	def __remove_niqqud(self, text: str) -> str:
	return Hebrew(text).no_niqqud().string

	def __remove_superfluous_hebrew_unicode_symbols(self, text: str) -> str:
	return text.translate(self.superfluous_hebrew_unicode_symbols_translator)

	def __remove_quotes(self, text: str) -> str:
	return text.translate(self.quotes_translator)

	def __call__(self, text):
	text = self.__remove_niqqud(text)
	text = self.__remove_superfluous_hebrew_unicode_symbols(text)
	text = self.__remove_quotes(text)
	text = super().__call__(text)
	return text