Spaces:
Running
Running
import { setVoice, textToWavAudio } from "./piper.js"; | |
const voiceRoot = "https:huggingface.co/rhasspy/piper-voices/resolve/main"; | |
let languageToSelect = null; | |
let voiceToSelect = null; | |
let qualityToSelect = null; | |
let voices = {}; | |
const qualitySort = { | |
x_low: 0, | |
low: 1, | |
medium: 2, | |
high: 3, | |
}; | |
let voiceId = ""; | |
let voiceUrl = ""; | |
let loadedVoiceId = ""; | |
async function main() { | |
loadVoices(); | |
const buttonSpeak = document.getElementById("buttonSpeak"); | |
const audioTTS = document.getElementById("audioTTS"); | |
const textInput = document.getElementById("textInput"); | |
const status = document.getElementById("status"); | |
const speakerSelect = document.getElementById("speaker"); | |
const inputLengthScale = document.getElementById("lengthScale"); | |
const inputNoiseScale = document.getElementById("noiseScale"); | |
const inputNoiseWScale = document.getElementById("noiseWScale"); | |
buttonSpeak.addEventListener("click", async () => { | |
const text = textInput.value; | |
if (!text) { | |
return; | |
} | |
if (!voiceId) { | |
console.log("ERROR: No voice id"); | |
return; | |
} | |
if (voiceId != loadedVoiceId) { | |
if (!voiceUrl) { | |
console.log("ERROR: No voice URL"); | |
return; | |
} | |
status.innerHTML = "Loading voice..."; | |
await setVoice( | |
`${voiceUrl}/${voiceId}.onnx?download=true`, | |
`${voiceUrl}/${voiceId}.onnx.json?download=true`, | |
); | |
loadedVoiceId = voiceId; | |
} | |
let speakerId = null; | |
if (speakerSelect.selectedIndex > 0) { | |
speakerId = parseInt(speakerSelect.value); | |
} | |
let lengthScale = parseFloat(inputLengthScale.value); | |
if (isNaN(lengthScale)) { | |
lengthScale = null; | |
} | |
let noiseScale = parseFloat(inputNoiseScale.value); | |
if (isNaN(noiseScale)) { | |
noiseScale = null; | |
} | |
let noiseWScale = parseFloat(inputNoiseWScale.value); | |
if (isNaN(noiseWScale)) { | |
noiseWScale = null; | |
} | |
status.innerHTML = "Synthesizing audio..."; | |
const wavAudio = await textToWavAudio( | |
text, | |
speakerId, | |
lengthScale, | |
noiseScale, | |
noiseWScale, | |
); | |
const audioURL = URL.createObjectURL(wavAudio); | |
audioTTS.src = audioURL; | |
audioTTS.play(); | |
status.innerHTML = "Ready"; | |
}); | |
textInput.disabled = false; | |
buttonSpeak.disabled = false; | |
window.setLanguage = setLanguage; | |
window.setVoiceName = setVoiceName; | |
window.setQuality = setQuality; | |
window.setSpeaker = setSpeaker; | |
} | |
document.addEventListener("DOMContentLoaded", () => { | |
main(); | |
}); | |
// ---------------------------------------------------------------------------- | |
export function setLanguage() { | |
var language = document.getElementById("languages").value; | |
if (language.length > 0) { | |
var voiceSelect = document.getElementById("voice"); | |
while (voiceSelect.options.length > 1) { | |
voiceSelect.remove(voiceSelect.options.length - 1); | |
} | |
let names = []; | |
for (let key in voices) { | |
let voice = voices[key]; | |
if (voice.language.code == language) { | |
names.push(voice.name); | |
} | |
} | |
names = Array.from(new Set(names)).sort(); | |
for (let i in names) { | |
let name = names[i]; | |
let option = document.createElement("option"); | |
option.text = name; | |
option.value = name; | |
voiceSelect.add(option); | |
} | |
if (voiceToSelect) { | |
voiceSelect.value = voiceToSelect; | |
voiceToSelect = null; | |
setVoiceName(); | |
} else if (voiceSelect.options.length > 1) { | |
// Select first voice | |
voiceSelect.selectedIndex = 1; | |
setVoiceName(); | |
} | |
} | |
} | |
function setVoiceName() { | |
var language = document.getElementById("languages").value; | |
var voiceName = document.getElementById("voice").value; | |
if (voiceName.length > 0) { | |
var qualitySelect = document.getElementById("quality"); | |
while (qualitySelect.options.length > 1) { | |
qualitySelect.remove(qualitySelect.options.length - 1); | |
} | |
let qualities = []; | |
for (let key in voices) { | |
let voice = voices[key]; | |
if (voice.language.code == language && voice.name == voiceName) { | |
qualities.push(voice.quality); | |
} | |
} | |
qualities = Array.from(new Set(qualities)).sort( | |
(a, b) => qualitySort[a] - qualitySort[b], | |
); | |
for (let i in qualities) { | |
let quality = qualities[i]; | |
let option = document.createElement("option"); | |
option.text = quality; | |
option.value = quality; | |
qualitySelect.add(option); | |
} | |
if (qualityToSelect) { | |
qualitySelect.value = qualityToSelect; | |
qualityToSelect = null; | |
setQuality(); | |
} else if (qualitySelect.options.length > 1) { | |
// Select highest quality | |
qualitySelect.selectedIndex = qualitySelect.options.length - 1; | |
setQuality(); | |
} | |
} | |
} | |
function setQuality() { | |
var language = document.getElementById("languages").value; | |
var voiceName = document.getElementById("voice").value; | |
var quality = document.getElementById("quality").value; | |
if (quality.length > 0) { | |
var speakerSelect = document.getElementById("speaker"); | |
while (speakerSelect.options.length > 1) { | |
speakerSelect.remove(speakerSelect.options.length - 1); | |
} | |
var numSpeakers = 1; | |
var speakerIdMap = {}; | |
for (let key in voices) { | |
let voice = voices[key]; | |
if ( | |
voice.language.code == language && | |
voice.name == voiceName && | |
voice.quality == quality | |
) { | |
numSpeakers = voice.num_speakers; | |
speakerIdMap = voice.speaker_id_map; | |
break; | |
} | |
} | |
if (numSpeakers <= 1) { | |
// Single speaker model | |
let option = document.createElement("option"); | |
option.text = "default"; | |
option.value = "0"; | |
speakerSelect.add(option); | |
} else { | |
// Multi-speaker model | |
let sortedSpeakers = Object.keys(speakerIdMap).sort( | |
(a, b) => speakerIdMap[a] - speakerIdMap[b], | |
); | |
for (let i in sortedSpeakers) { | |
let speaker = sortedSpeakers[i]; | |
let option = document.createElement("option"); | |
option.text = speaker + " (" + i.toString() + ")"; | |
option.value = i.toString(); | |
speakerSelect.add(option); | |
} | |
} | |
if (speakerSelect.options.length > 1) { | |
// Select first speaker | |
speakerSelect.selectedIndex = 1; | |
setSpeaker(); | |
} | |
voiceId = `${language}-${voiceName}-${quality}`; | |
window.location.hash = voiceId; | |
} | |
} | |
function setSpeaker() { | |
var language = document.getElementById("languages").value; | |
let languageFamily = language.split("_")[0]; | |
var voiceName = document.getElementById("voice").value; | |
var quality = document.getElementById("quality").value; | |
var speaker = document.getElementById("speaker").value; | |
if (speaker.length > 0) { | |
for (let key in voices) { | |
let voice = voices[key]; | |
if ( | |
voice.language.code == language && | |
voice.name == voiceName && | |
voice.quality == quality | |
) { | |
voiceUrl = `${voiceRoot}/${languageFamily}/${language}/${voiceName}/${quality}`; | |
let aKey = document.getElementById("key"); | |
aKey.innerHTML = key; | |
aKey.href = voiceUrl; | |
fetch(`txt/${languageFamily}.txt`) | |
.then((response) => response.text()) | |
.then((text) => { | |
document.getElementById("textInput").innerHTML = text; | |
}); | |
} | |
} | |
} | |
} | |
function loadVoices() { | |
let hash = window.location.hash; | |
if (hash.length > 0) { | |
let voiceIdRegexp = RegExp("^#([^-]+)-([^-]+)-([^-]+)$"); | |
let match = voiceIdRegexp.exec(hash); | |
if (match) { | |
languageToSelect = match[1]; | |
voiceToSelect = match[2]; | |
qualityToSelect = match[3]; | |
} | |
} | |
fetch(`${voiceRoot}/voices.json?download=true`) | |
.then((response) => response.json()) | |
.then((response_obj) => { | |
voices = response_obj; | |
let voiceLanguages = []; | |
let languageNames = {}; | |
for (let key in voices) { | |
let voice = voices[key]; | |
voiceLanguages.push(voice.language.code); | |
languageNames[voice.language.code] = | |
voice.language.name_native + | |
" (" + | |
voice.language.name_english + | |
", " + | |
voice.language.country_english + | |
")"; | |
} | |
let sortedLanguages = Array.from(new Set(voiceLanguages)).sort(); | |
let languagesSelect = document.getElementById("languages"); | |
for (let i in sortedLanguages) { | |
let language = sortedLanguages[i]; | |
let option = document.createElement("option"); | |
option.text = languageNames[language]; | |
option.value = language; | |
languagesSelect.add(option); | |
} | |
if (languageToSelect) { | |
languagesSelect.value = languageToSelect; | |
languageToSelect = null; | |
setLanguage(); | |
} | |
}); | |
} | |