Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

ai-pronunciation-trainer / static /javascript /callbacks.js

alessandro trinca tornidor

feat: port whisper and faster-whisper support from https://github.com/Thiagohgl/ai-pronunciation-trainer

85b7206 3 months ago

history blame contribute delete

24.3 kB



	// Audio context initialization
	let mediaRecorder, audioChunks, audioBlob, stream, audioRecorded;
	const ctx = new AudioContext();
	let currentAudioForPlaying;
	let lettersOfWordAreCorrect = [];

	// UI-related variables
	const page_title = "AI Pronunciation Trainer";
	const accuracy_colors = ["green", "orange", "red"];
	let badScoreThreshold = 30;
	let mediumScoreThreshold = 70;
	let currentSample = 0;
	let currentScore = 0.;
	let sample_difficult = 0;
	let scoreMultiplier = 1;
	let playAnswerSounds = true;
	let isNativeSelectedForPlayback = true;
	let isRecording = false;
	let serverIsInitialized = false;
	let serverWorking = true;
	let languageFound = true;
	let currentSoundRecorded = false;
	let currentText, currentIpa, real_transcripts_ipa, matched_transcripts_ipa;
	let wordCategories;
	let startTime, endTime;
	let SingleWordIpaPairBackup;

	// API related variables
	let AILanguage = "de"; // Standard is German

	// Read the Public Key from an env variable (it's managed within the python/flask code - STScoreAPIKey).
	// If, for some reason, you would like a private one, send-me a message and we can discuss some possibilities
	try {
	const cookieList = document.cookie.split("=")
	const STScoreAPIKey = cookieList[1]
	} catch (error) {
	console.log("STScoreAPIKey::error:", error, "#")
	}

	let apiMainPathSample = '';// 'http://127.0.0.1:3001';// 'https://a3hj0l2j2m.execute-api.eu-central-1.amazonaws.com/Prod';
	let apiMainPathSTS = '';// 'https://wrg7ayuv7i.execute-api.eu-central-1.amazonaws.com/Prod';
	const defaultOriginalScript = "Click on the bar on the right to generate a new sentence (please use chrome web browser)."
	const defaultErrorScript = "Server error. Either the daily quota of the server is over or there was some internal error. You can try to generate a new sample in a few seconds. If the error persist, try comming back tomorrow or download the local version from Github :)";
	const editErrorScript = "Please edit this text before generating the IPA for a custom sentence!";
	const browserUnsupported = "Browser unsupported";
	const recordingError = "Recording error, please try again or restart page.";

	// Variables to playback accuracy sounds
	let soundsPath = '../static';//'https://stscore-sounds-bucket.s3.eu-central-1.amazonaws.com';
	let soundFileGood = null;
	let soundFileOkay = null;
	let soundFileBad = null;

	// Speech generation
	var synth = window.speechSynthesis;
	let voice_idx = 0;
	let voice_synth = null;

	//############################ UI general control functions ###################
	const unblockUI = (unlockIPACustomText = false) => {
	document.getElementById("recordAudio").classList.remove('disabled');
	document.getElementById("playSampleAudio").classList.remove('disabled');
	document.getElementById("buttonNext").onclick = () => getNextSample();
	document.getElementById("nextButtonDiv").classList.remove('disabled');
	document.getElementById("original_script").classList.remove('disabled');
	document.getElementById("buttonNext").style["background-color"] = '#58636d';

	if (currentSoundRecorded)
	document.getElementById("playRecordedAudio").classList.remove('disabled');

	enableElementWithClass("input-uploader-audio-file")
	if (unlockIPACustomText) {
	enableElementWithClass("buttonCustomText")
	}
	};

	const blockUI = () => {

	document.getElementById("recordAudio").classList.add('disabled');
	document.getElementById("playSampleAudio").classList.add('disabled');
	document.getElementById("buttonNext").onclick = null;
	document.getElementById("original_script").classList.add('disabled');
	document.getElementById("playRecordedAudio").classList.add('disabled');

	document.getElementById("buttonNext").classList.add('disabled');
	disableElementWithClass("input-uploader-audio-file")

	};

	const UIError = (errorMsg = defaultErrorScript) => {
	blockUI();
	document.getElementById("buttonNext").onclick = () => getNextSample(); //If error, user can only try to get a new sample
	document.getElementById("buttonNext").style["background-color"] = '#58636d';

	document.getElementById("recorded_ipa_script").innerHTML = "";
	document.getElementById("single_word_ipa_pair_error").style["display"] = "inline";
	document.getElementById("single_word_ipa_pair_separator").style["display"] = "none";
	document.getElementById("single_word_ipa_reference_recorded").style["display"] = "none";
	document.getElementById("single_word_ipa_current").style["display"] = "none";
	document.getElementById("ipa_script").innerText = "Error"

	document.getElementById("main_title").innerText = 'Server Error';
	document.getElementById("original_script").innerHTML = errorMsg;
	};

	const disableElementWithClass = (id) => {
	let el = document.getElementById(id)
	el.disabled = true;
	el.classList.remove('darkgreen');
	}

	const enableElementWithClass = (id) => {
	let el = document.getElementById(id)
	el.removeAttribute("disabled");
	el.classList.add('darkgreen');
	}

	const UINotSupported = () => {
	unblockUI();

	document.getElementById("main_title").innerText = browserUnsupported;

	}

	const UIRecordingError = () => {
	unblockUI();
	document.getElementById("main_title").innerText = recordingError;
	startMediaDevice();
	}



	//################### Application state functions #######################
	function updateScore(currentPronunciationScore) {

	if (Number.isNaN(currentPronunciationScore))
	return;
	currentScore += currentPronunciationScore * scoreMultiplier;
	currentScore = Math.round(currentScore);
	}

	const cacheSoundFiles = async () => {
	await fetch(soundsPath + '/ASR_good.wav').then(data => data.arrayBuffer()).
	then(arrayBuffer => ctx.decodeAudioData(arrayBuffer)).
	then(decodeAudioData => {
	soundFileGood = decodeAudioData;
	});

	await fetch(soundsPath + '/ASR_okay.wav').then(data => data.arrayBuffer()).
	then(arrayBuffer => ctx.decodeAudioData(arrayBuffer)).
	then(decodeAudioData => {
	soundFileOkay = decodeAudioData;
	});

	await fetch(soundsPath + '/ASR_bad.wav').then(data => data.arrayBuffer()).
	then(arrayBuffer => ctx.decodeAudioData(arrayBuffer)).
	then(decodeAudioData => {
	soundFileBad = decodeAudioData;
	});
	}

	const getCustomTextIsDisabled = () => {
	const checkText = document.getElementById("original_script").innerText.trim();
	let cleanedText = checkText.toString().replace(/[^\w\s]/gi, ' ').trim();
	return checkText === defaultOriginalScript \|\| checkText === defaultErrorScript \|\| checkText === editErrorScript \|\| cleanedText === "";
	}

	const getCustomText = async () => {
	blockUI();

	if (!serverIsInitialized)
	await initializeServer();

	if (!serverWorking) {
	UIError();
	return;
	}

	if (soundFileBad == null)
	cacheSoundFiles();

	if (getCustomTextIsDisabled()) {
	UIError(editErrorScript);
	return;
	}
	updateScore(parseFloat(document.getElementById("pronunciation_accuracy").innerHTML));

	document.getElementById("main_title").innerText = "Get IPA transcription for custom text...";

	try {
	const original_script_element = document.getElementById("original_script")
	const original_script = original_script_element.innerText;
	await fetch(apiMainPathSample + '/getSample', {
	method: "post",
	body: JSON.stringify({
	"language": AILanguage,
	"transcript": original_script
	}),
	headers: { "X-Api-Key": STScoreAPIKey }
	}).then(res => res.json()).
	then(data => {
	formatTranscriptData(data);
	audioRecorded = undefined;
	})
	}
	catch (err)
	{
	console.log("getCustomText::err:", err)
	UIError();
	}
	}

	const getNextSample = async () => {
	blockUI();

	if (!serverIsInitialized)
	await initializeServer();

	if (!serverWorking) {
	UIError();
	return;
	}

	if (soundFileBad == null)
	cacheSoundFiles();



	updateScore(parseFloat(document.getElementById("pronunciation_accuracy").innerHTML));

	document.getElementById("main_title").innerText = "Processing new sample...";


	if (document.getElementById('lengthCat1').checked) {
	sample_difficult = 0;
	scoreMultiplier = 1.3;
	}
	else if (document.getElementById('lengthCat2').checked) {
	sample_difficult = 1;
	scoreMultiplier = 1;
	}
	else if (document.getElementById('lengthCat3').checked) {
	sample_difficult = 2;
	scoreMultiplier = 1.3;
	}
	else if (document.getElementById('lengthCat4').checked) {
	sample_difficult = 3;
	scoreMultiplier = 1.6;
	}

	try {
	await fetch(apiMainPathSample + '/getSample', {
	method: "post",
	body: JSON.stringify({
	"category": sample_difficult.toString(), "language": AILanguage
	}),
	headers: { "X-Api-Key": STScoreAPIKey }
	}).then(res => res.json()).
	then(data => {
	formatTranscriptData(data);
	})
	}
	catch (err)
	{
	console.log("getNextSample::err:", err)
	UIError();
	}
	};

	const formatTranscriptData = (data) => {
	let doc = document.getElementById("original_script");
	currentText = data.real_transcript;
	doc.innerText = currentText;

	currentIpa = data.ipa_transcript

	let doc_ipa = document.getElementById("ipa_script");
	doc_ipa.ariaLabel = "ipa_script"
	doc_ipa.innerText = `/ ${currentIpa} /`;

	let recorded_ipa_script = document.getElementById("recorded_ipa_script")
	recorded_ipa_script.ariaLabel = "recorded_ipa_script"
	recorded_ipa_script.innerText = ""

	let pronunciation_accuracy = document.getElementById("pronunciation_accuracy")
	pronunciation_accuracy.ariaLabel = "pronunciation_accuracy"
	pronunciation_accuracy.innerHTML = "";

	// restore a clean state for document.getElementById("single_word_ipa_pair") to avoid errors when playing the word audio
	$(document).ready(function() {
	$("#single_word_ipa_pair").replaceWith(SingleWordIpaPairBackup.clone())
	})

	document.getElementById("section_accuracy").innerText = `\| Score: ${currentScore.toString()} - (${currentSample.toString()})`;
	currentSample += 1;

	document.getElementById("main_title").innerText = page_title;

	document.getElementById("translated_script").innerText = data.transcript_translation;

	currentSoundRecorded = false;
	unblockUI(true);
	document.getElementById("playRecordedAudio").classList.add('disabled');
	}

	const updateRecordingState = async () => {
	return isRecording ? stopRecording() : recordSample();
	}

	const generateWordModal = (word_idx) => {
	wrapWordForPlayingLink(real_transcripts_ipa[word_idx], word_idx, false, "black")
	wrapWordForPlayingLink(matched_transcripts_ipa[word_idx], word_idx, true, accuracy_colors[parseInt(wordCategories[word_idx])])
	}

	const recordSample = async () => {

	document.getElementById("main_title").innerText = "Recording... click again when done speaking";
	document.getElementById("recordIcon").innerHTML = 'pause_presentation';
	blockUI();
	document.getElementById("recordAudio").classList.remove('disabled');
	audioChunks = [];
	isRecording = true;
	mediaRecorder.start();

	}

	const changeLanguage = (language, generateNewSample = false) => {
	voices = synth.getVoices();
	AILanguage = language;
	languageFound = false;
	let languageIdentifier, languageName;
	switch (language) {
	case 'de':

	document.getElementById("languageBox").innerText = "German";
	languageIdentifier = 'de';
	languageName = 'Anna';
	break;

	case 'en':

	document.getElementById("languageBox").innerText = "English";
	languageIdentifier = 'en';
	languageName = 'Daniel';
	break;
	};

	for (idx = 0; idx < voices.length; idx++) {
	if (voices[idx].lang.slice(0, 2) === languageIdentifier && voices[idx].name === languageName) {
	voice_synth = voices[idx];
	languageFound = true;
	break;
	}

	}
	// If specific voice not found, search anything with the same language
	if (!languageFound) {
	for (idx = 0; idx < voices.length; idx++) {
	if (voices[idx].lang.slice(0, 2) === languageIdentifier) {
	voice_synth = voices[idx];
	languageFound = true;
	break;
	}
	}
	}
	if (generateNewSample)
	getNextSample();
	}

	//################### Speech-To-Score function ########################
	const mediaStreamConstraints = {
	audio: {
	channelCount: 1,
	sampleRate: 48000
	}
	}


	async function sendAudioToGetAccuracyFromRecordedAudio(audioBase64) {
	try {
	// Get currentText from "original_script" div, in case user has change it
	let text = document.getElementById("original_script").innerHTML;
	// Remove html tags
	text = text.replace(/<[^>]*>?/gm, '');
	//Remove spaces on the beginning and end
	text = text.trim();
	// Remove double spaces
	text = text.replace(/\s\s+/g, ' ');
	currentText = [text];
	let useDTWValue = document.getElementById("checkbox-dtw").checked
	console.log(`useDTWValue: '${typeof useDTWValue}', '${useDTWValue}'`)

	await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
	method: "post",
	body: JSON.stringify({
	"title": currentText[0], "base64Audio": audioBase64, "language": AILanguage, "useDTW": useDTWValue
	}),
	headers: {"X-Api-Key": STScoreAPIKey}

	}).then(res => res.json()).then(data => {

	if (playAnswerSounds)
	playSoundForAnswerAccuracy(parseFloat(data.pronunciation_accuracy))

	document.getElementById("recorded_ipa_script").innerText = `/ ${data.ipa_transcript} /`;
	document.getElementById("recordAudio").classList.add('disabled');
	document.getElementById("main_title").innerText = page_title;
	document.getElementById("pronunciation_accuracy").innerText = `${data.pronunciation_accuracy}%`;
	document.getElementById("ipa_script").innerText = data.real_transcripts_ipa

	lettersOfWordAreCorrect = data.is_letter_correct_all_words.split(" ")

	startTime = data.start_time;
	endTime = data.end_time;

	real_transcripts_ipa = data.real_transcripts_ipa.split(" ")
	matched_transcripts_ipa = data.matched_transcripts_ipa.split(" ")
	wordCategories = data.pair_accuracy_category.split(" ")
	let arrayOriginalText = currentText[0].split(" ")

	let arrayColoredWords = document.getElementById("original_script")
	arrayColoredWords.textContent = ""

	for (let wordIdx in arrayOriginalText) {
	let currentWordText = arrayOriginalText[wordIdx]

	let letterIsCorrect = lettersOfWordAreCorrect[wordIdx]

	let coloredWordTemp = document.createElement("a")
	for (let letterIdx in currentWordText) {
	let letterCorrect = letterIsCorrect[letterIdx] === "1"
	let containerLetter = document.createElement("span")
	containerLetter.style.color = letterCorrect ? 'green' : "red"
	containerLetter.innerText = currentWordText[letterIdx];
	coloredWordTemp.appendChild(containerLetter)

	coloredWordTemp.ariaLabel = `word${wordIdx}${currentWordText}`.replace(/[^a-zA-Z0-9]/g, "")
	console.log(`coloredWordTemp.ariaLabel:${coloredWordTemp.ariaLabel}!`)
	coloredWordTemp.style.whiteSpace = "nowrap"
	coloredWordTemp.style.textDecoration = "underline"
	coloredWordTemp.onclick = function () {
	generateWordModal(wordIdx.toString())
	}
	arrayColoredWords.appendChild(coloredWordTemp)
	}
	let containerSpace = document.createElement("span")
	containerSpace.textContent = " "
	arrayColoredWords.appendChild(containerSpace)
	}

	currentSoundRecorded = true;
	unblockUI();
	document.getElementById("playRecordedAudio").classList.remove('disabled');

	});
	} catch {
	UIError();
	}
	}

	const startMediaDevice = () => {
	navigator.mediaDevices.getUserMedia(mediaStreamConstraints).then(_stream => {
	stream = _stream
	mediaRecorder = new MediaRecorder(stream);

	let currentSamples = 0
	mediaRecorder.ondataavailable = event => {

	currentSamples += event.data.length
	audioChunks.push(event.data);
	};

	mediaRecorder.onstop = async () => {


	document.getElementById("recordIcon").innerHTML = 'mic';
	blockUI();


	audioBlob = new Blob(audioChunks, { type: 'audio/ogg;' });

	let audioUrl = URL.createObjectURL(audioBlob);
	audioRecorded = new Audio(audioUrl);

	let audioBase64 = await convertBlobToBase64(audioBlob);

	let minimumAllowedLength = 6;
	if (audioBase64.length < minimumAllowedLength) {
	setTimeout(UIRecordingError, 50); // Make sure this function finished after get called again
	return;
	}
	await sendAudioToGetAccuracyFromRecordedAudio(audioBase64);
	};

	});
	};
	startMediaDevice();

	// ################### Audio playback ##################
	const playSoundForAnswerAccuracy = async (accuracy) => {

	currentAudioForPlaying = soundFileGood;
	if (accuracy < mediumScoreThreshold) {
	if (accuracy < badScoreThreshold) {
	currentAudioForPlaying = soundFileBad;
	}
	else {
	currentAudioForPlaying = soundFileOkay;
	}
	}
	playback();

	}

	const playAudio = async () => {

	document.getElementById("main_title").innerText = "Generating sound...";
	playWithMozillaApi(currentText[0]);
	document.getElementById("main_title").innerText = "Current Sound was played";

	};

	function playback() {
	const playSound = ctx.createBufferSource();
	playSound.buffer = currentAudioForPlaying;
	playSound.connect(ctx.destination);
	playSound.start(ctx.currentTime)
	}


	const playRecording = async (start = null, end = null) => {
	blockUI();

	try {
	if (start == null \|\| end == null) {
	endTimeInMs = Math.round(audioRecorded.duration * 1000)
	audioRecorded.addEventListener("ended", function () {
	audioRecorded.currentTime = 0;
	unblockUI();
	document.getElementById("main_title").innerText = "Recorded Sound was played";
	});
	await audioRecorded.play();

	}
	else {
	audioRecorded.currentTime = start;
	audioRecorded.play();
	durationInSeconds = end - start;
	endTimeInMs = Math.round(durationInSeconds * 1000);
	setTimeout(function () {
	unblockUI();
	audioRecorded.pause();
	audioRecorded.currentTime = 0;
	document.getElementById("main_title").innerHTML = "Recorded Sound was played";
	}, endTimeInMs);

	}
	}
	catch {
	UINotSupported();
	}
	};

	const playNativeAndRecordedWord = async (word_idx) => {

	if (isNativeSelectedForPlayback)
	playCurrentWord(word_idx)
	else
	playRecordedWord(word_idx);

	isNativeSelectedForPlayback = !isNativeSelectedForPlayback;
	}

	const stopRecording = () => {
	isRecording = false
	mediaRecorder.stop()
	document.getElementById("main_title").innerText = "Processing audio...";
	}


	const playCurrentWord = async (word_idx) => {

	document.getElementById("main_title").innerText = "Generating word...";
	playWithMozillaApi(currentText[0].split(' ')[word_idx]);
	document.getElementById("main_title").innerText = "Word was played";
	}

	// TODO: Check if fallback is correct
	const playWithMozillaApi = (text) => {

	if (languageFound) {
	blockUI();
	if (voice_synth == null)
	changeLanguage(AILanguage);

	var utterThis = new SpeechSynthesisUtterance(text);
	utterThis.voice = voice_synth;
	utterThis.rate = 0.7;
	utterThis.onend = function (event) {
	unblockUI();
	}
	synth.speak(utterThis);
	}
	else {
	UINotSupported();
	}
	}

	const playRecordedWord = (word_idx) => {

	wordStartTime = parseFloat(startTime.split(' ')[word_idx]);
	wordEndTime = parseFloat(endTime.split(' ')[word_idx]);

	playRecording(wordStartTime, wordEndTime);

	}

	// ############# Utils #####################
	const convertBlobToBase64 = async (blob) => {
	return await blobToBase64(blob);
	}

	const blobToBase64 = blob => new Promise((resolve, reject) => {
	const reader = new FileReader();
	reader.readAsDataURL(blob);
	reader.onload = () => resolve(reader.result);
	reader.onerror = error => reject(error);
	});

	const wrapWordForPlayingLink = (word, word_idx, isSpokenWord, word_color) => {
	// for some reason here the function is swapped
	const fn = isSpokenWord ? "playRecordedWord" : "playCurrentWord";
	const id = isSpokenWord ? "single_word_ipa_current" : "single_word_ipa_reference_recorded";
	const element = document.getElementById(id)
	element.innerText = word
	element.href = `javascript:${fn}(${word_idx.toString()})`
	element.removeAttribute("disabled")
	element.style["color"] = word_color
	element.style["whiteSpace"] = "nowrap"
	}

	// ########## Function to initialize server ###############
	// This is to try to avoid aws lambda cold start
	try {
	fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
	method: "post",
	body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
	headers: { "X-Api-Key": STScoreAPIKey }

	});
	}
	catch { }

	const audioToBase64 = async (audioFile) => {
	return new Promise((resolve, reject) => {
	let reader = new FileReader();
	reader.onerror = reject;
	reader.onload = (e) => resolve(e.target.result);

	// custom: set the global variable 'audioRecorded' to play later the uploaded audio
	let audioUrl = URL.createObjectURL(audioFile);
	audioRecorded = new Audio(audioUrl);

	reader.readAsDataURL(audioFile);
	});
	}

	const audioUpload = async (audioFile) => {
	console.log("starting uploading the file...")
	let audioBase64 = await audioToBase64(audioFile);
	console.log("file uploaded, starting making the request...")
	await sendAudioToGetAccuracyFromRecordedAudio(audioBase64);
	console.log("request done!")
	}

	const initializeServer = async () => {

	let valid_response = false;
	document.getElementById("main_title").innerText = 'Initializing server, this may take up to 2 minutes...';
	$(document).ready(function() {
	// required to properly reset the #single_word_ipa_pair element
	SingleWordIpaPairBackup = $("#single_word_ipa_pair").clone();
	})
	let number_of_tries = 0;
	let maximum_number_of_tries = 4;

	while (!valid_response) {
	if (number_of_tries > maximum_number_of_tries) {
	serverWorking = false;
	break;
	}

	try {
	await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
	method: "post",
	body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
	headers: { "X-Api-Key": STScoreAPIKey }

	}).then(
	valid_response = true);
	serverIsInitialized = true;
	}
	catch (e)
	{
	number_of_tries += 1;
	console.log(`initializeServer::error: ${e}, retry n=${number_of_tries}.`)
	}
	}
	}