|
|
|
import TextLineStream from 'textlinestream'; |
|
import { Client } from '@gradio/client'; |
|
import * as lamejs from '@breezystack/lamejs'; |
|
|
|
|
|
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator'; |
|
import { CONFIG } from '../config'; |
|
import { uploadFiles } from '@huggingface/hub'; |
|
|
|
export const isDev: boolean = import.meta.env.MODE === 'development'; |
|
export const testToken: string = import.meta.env.VITE_TEST_TOKEN; |
|
export const isBlogMode: boolean = !!window.location.href.match(/blogmode/); |
|
|
|
export const delay = (ms: number) => new Promise((res) => setTimeout(res, ms)); |
|
|
|
|
|
export const generateAudio = async ( |
|
content: string, |
|
voice: string, |
|
speed: number = 1.1 |
|
): Promise<string> => { |
|
const maxRetries = 3; |
|
for (let i = 0; i < maxRetries; i++) { |
|
try { |
|
const client = await Client.connect(CONFIG.ttsSpaceId); |
|
const result = await client.predict('/tts', { |
|
text: content, |
|
voice, |
|
speed, |
|
}); |
|
|
|
console.log(result.data); |
|
return (result.data as any)[0].url; |
|
} catch (e) { |
|
if (i === maxRetries - 1) { |
|
throw e; |
|
} |
|
console.error('Failed to generate audio, retrying...', e); |
|
} |
|
continue; |
|
} |
|
return ''; |
|
}; |
|
|
|
export const pickRand = <T>(arr: T[]): T => { |
|
return arr[Math.floor(Math.random() * arr.length)]; |
|
}; |
|
|
|
|
|
export async function* getSSEStreamAsync(fetchResponse: Response) { |
|
if (!fetchResponse.body) throw new Error('Response body is empty'); |
|
const lines: ReadableStream<string> = fetchResponse.body |
|
.pipeThrough(new TextDecoderStream()) |
|
.pipeThrough(new TextLineStream()); |
|
|
|
for await (const line of asyncIterator(lines)) { |
|
|
|
if (line.startsWith('data:') && !line.endsWith('[DONE]')) { |
|
const data = JSON.parse(line.slice(5)); |
|
yield data; |
|
} else if (line.startsWith('error:')) { |
|
const data = JSON.parse(line.slice(6)); |
|
throw new Error(data.message || 'Unknown error'); |
|
} |
|
} |
|
} |
|
|
|
export const uploadFileToHub = async ( |
|
buf: ArrayBuffer, |
|
filename: string, |
|
repoId: string, |
|
hfToken: string |
|
) => { |
|
await uploadFiles({ |
|
accessToken: hfToken, |
|
repo: repoId, |
|
files: [ |
|
{ |
|
path: filename, |
|
content: new Blob([buf], { type: 'audio/wav' }), |
|
}, |
|
], |
|
}); |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => { |
|
const threshold = 0.01; |
|
const numChannels = audioBuffer.numberOfChannels; |
|
const totalSamples = audioBuffer.length; |
|
|
|
|
|
const isSilent = (index: number): boolean => { |
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const channelData = audioBuffer.getChannelData(channel); |
|
if (Math.abs(channelData[index]) > threshold) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
}; |
|
|
|
|
|
let startSample = 0; |
|
while (startSample < totalSamples && isSilent(startSample)) { |
|
startSample++; |
|
} |
|
|
|
|
|
let endSample = totalSamples - 1; |
|
while (endSample >= startSample && isSilent(endSample)) { |
|
endSample--; |
|
} |
|
|
|
|
|
if (startSample >= totalSamples || endSample < startSample) { |
|
return new AudioBuffer({ |
|
length: 1, |
|
numberOfChannels: numChannels, |
|
sampleRate: audioBuffer.sampleRate, |
|
}); |
|
} |
|
|
|
const newLength = endSample - startSample + 1; |
|
const newBuffer = new AudioBuffer({ |
|
length: newLength, |
|
numberOfChannels: numChannels, |
|
sampleRate: audioBuffer.sampleRate, |
|
}); |
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const oldData = audioBuffer.getChannelData(channel); |
|
const newData = newBuffer.getChannelData(channel); |
|
for (let i = 0; i < newLength; i++) { |
|
newData[i] = oldData[startSample + i]; |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
export const joinAudio = ( |
|
audio1: AudioBuffer, |
|
audio2: AudioBuffer, |
|
gapMilisecs: number, |
|
overlap: 'none' | 'cross-fade' = 'none' |
|
): AudioBuffer => { |
|
const sampleRate = audio1.sampleRate; |
|
const numChannels = audio1.numberOfChannels; |
|
|
|
|
|
if (audio2.sampleRate !== sampleRate) { |
|
throw new Error('Audio buffers must have the same sample rate'); |
|
} |
|
if (audio2.numberOfChannels !== numChannels) { |
|
throw new Error('Audio buffers must have the same number of channels'); |
|
} |
|
|
|
const gapSeconds = gapMilisecs / 1000; |
|
let newLength: number; |
|
|
|
if (gapSeconds > 0) { |
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate); |
|
newLength = audio1.length + gapSamples + audio2.length; |
|
} else if (gapSeconds === 0) { |
|
|
|
newLength = audio1.length + audio2.length; |
|
} else { |
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); |
|
|
|
const effectiveOverlap = Math.min( |
|
overlapSamplesRequested, |
|
audio1.length, |
|
audio2.length |
|
); |
|
newLength = audio1.length + audio2.length - effectiveOverlap; |
|
} |
|
|
|
|
|
const newBuffer = new AudioBuffer({ |
|
length: newLength, |
|
numberOfChannels: numChannels, |
|
sampleRate: sampleRate, |
|
}); |
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const outputData = newBuffer.getChannelData(channel); |
|
const data1 = audio1.getChannelData(channel); |
|
const data2 = audio2.getChannelData(channel); |
|
let offset = 0; |
|
|
|
if (gapSeconds < 0) { |
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); |
|
const effectiveOverlap = Math.min( |
|
overlapSamplesRequested, |
|
audio1.length, |
|
audio2.length |
|
); |
|
|
|
|
|
const nonOverlapLength = audio1.length - effectiveOverlap; |
|
outputData.set(data1.subarray(0, nonOverlapLength), offset); |
|
offset += nonOverlapLength; |
|
|
|
|
|
if (overlap === 'cross-fade') { |
|
for (let i = 0; i < effectiveOverlap; i++) { |
|
|
|
const fadeOut = 1 - i / effectiveOverlap; |
|
const fadeIn = i / effectiveOverlap; |
|
outputData[offset + i] = |
|
data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn; |
|
} |
|
} else { |
|
for (let i = 0; i < effectiveOverlap; i++) { |
|
outputData[offset + i] = data1[nonOverlapLength + i] + data2[i]; |
|
} |
|
} |
|
offset += effectiveOverlap; |
|
|
|
|
|
outputData.set(data2.subarray(effectiveOverlap), offset); |
|
} else if (gapSeconds === 0) { |
|
|
|
outputData.set(data1, offset); |
|
offset += audio1.length; |
|
outputData.set(data2, offset); |
|
} else { |
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate); |
|
outputData.set(data1, offset); |
|
offset += audio1.length; |
|
|
|
|
|
offset += gapSamples; |
|
|
|
outputData.set(data2, offset); |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
export const addNoise = ( |
|
audioBuffer: AudioBuffer, |
|
magnitude: number |
|
): AudioBuffer => { |
|
const { numberOfChannels, sampleRate, length } = audioBuffer; |
|
const newBuffer = new AudioBuffer({ |
|
length, |
|
numberOfChannels, |
|
sampleRate, |
|
}); |
|
|
|
for (let channel = 0; channel < numberOfChannels; channel++) { |
|
const inputData = audioBuffer.getChannelData(channel); |
|
const outputData = newBuffer.getChannelData(channel); |
|
|
|
for (let i = 0; i < length; i++) { |
|
|
|
const noise = (Math.random() * 2 - 1) * magnitude; |
|
outputData[i] = inputData[i] + noise; |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
export const addSilence = ( |
|
audioBuffer: AudioBuffer, |
|
toBeginning: boolean, |
|
durationMilisecs: number |
|
): AudioBuffer => { |
|
|
|
const sampleRate = audioBuffer.sampleRate; |
|
const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate); |
|
const numChannels = audioBuffer.numberOfChannels; |
|
const originalLength = audioBuffer.length; |
|
const newLength = originalLength + silenceSamples; |
|
|
|
|
|
const newBuffer = new AudioBuffer({ |
|
length: newLength, |
|
numberOfChannels: numChannels, |
|
sampleRate: sampleRate, |
|
}); |
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const originalData = audioBuffer.getChannelData(channel); |
|
const newData = newBuffer.getChannelData(channel); |
|
|
|
if (toBeginning) { |
|
|
|
newData.set(originalData, silenceSamples); |
|
} else { |
|
|
|
newData.set(originalData, 0); |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
|
|
|
|
|
|
export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => { |
|
const response = await fetch(url); |
|
const arrayBuffer = await response.arrayBuffer(); |
|
|
|
const AudioContext = window.AudioContext || window.webkitAudioContext; |
|
if (!AudioContext) { |
|
throw new Error('AudioContext is not supported on this browser'); |
|
} |
|
const audioCtx = new AudioContext(); |
|
let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer); |
|
|
|
if (audioBuffer.numberOfChannels > 1) { |
|
const monoBuffer = new AudioContext().createBuffer( |
|
1, |
|
audioBuffer.length, |
|
audioBuffer.sampleRate |
|
); |
|
const monoData = monoBuffer.getChannelData(0); |
|
for (let i = 0; i < audioBuffer.length; i++) { |
|
let sum = 0; |
|
for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) { |
|
sum += audioBuffer.getChannelData(channel)[i]; |
|
} |
|
monoData[i] = sum / audioBuffer.numberOfChannels; |
|
} |
|
audioBuffer = monoBuffer; |
|
} |
|
return audioBuffer; |
|
}; |
|
|
|
export function audioBufferToWav( |
|
buffer: AudioBuffer, |
|
options: { float32?: boolean } = {} |
|
): ArrayBuffer { |
|
const numChannels = buffer.numberOfChannels; |
|
const sampleRate = buffer.sampleRate; |
|
const format = options.float32 ? 3 : 1; |
|
const bitDepth = options.float32 ? 32 : 16; |
|
|
|
const numSamples = buffer.length; |
|
const headerLength = 44; |
|
const bytesPerSample = bitDepth / 8; |
|
const dataLength = numSamples * numChannels * bytesPerSample; |
|
const bufferLength = headerLength + dataLength; |
|
|
|
const arrayBuffer = new ArrayBuffer(bufferLength); |
|
const view = new DataView(arrayBuffer); |
|
let offset = 0; |
|
|
|
function writeString(str: string) { |
|
for (let i = 0; i < str.length; i++) { |
|
view.setUint8(offset, str.charCodeAt(i)); |
|
offset++; |
|
} |
|
} |
|
|
|
|
|
writeString('RIFF'); |
|
view.setUint32(offset, 36 + dataLength, true); |
|
offset += 4; |
|
writeString('WAVE'); |
|
writeString('fmt '); |
|
view.setUint32(offset, 16, true); |
|
offset += 4; |
|
view.setUint16(offset, format, true); |
|
offset += 2; |
|
view.setUint16(offset, numChannels, true); |
|
offset += 2; |
|
view.setUint32(offset, sampleRate, true); |
|
offset += 4; |
|
view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true); |
|
offset += 4; |
|
view.setUint16(offset, numChannels * bytesPerSample, true); |
|
offset += 2; |
|
view.setUint16(offset, bitDepth, true); |
|
offset += 2; |
|
writeString('data'); |
|
view.setUint32(offset, dataLength, true); |
|
offset += 4; |
|
|
|
|
|
const channels: Float32Array[] = []; |
|
for (let i = 0; i < numChannels; i++) { |
|
channels.push(buffer.getChannelData(i)); |
|
} |
|
|
|
for (let i = 0; i < numSamples; i++) { |
|
for (let channel = 0; channel < numChannels; channel++) { |
|
let sample = channels[channel][i]; |
|
|
|
sample = Math.max(-1, Math.min(1, sample)); |
|
if (options.float32) { |
|
view.setFloat32(offset, sample, true); |
|
offset += 4; |
|
} else { |
|
|
|
const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff; |
|
view.setInt16(offset, intSample, true); |
|
offset += 2; |
|
} |
|
} |
|
} |
|
|
|
return arrayBuffer; |
|
} |
|
|
|
export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => { |
|
|
|
const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false }); |
|
return new Blob([wavArrayBuffer], { type: 'audio/wav' }); |
|
}; |
|
|
|
export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer { |
|
const numChannels = buffer.numberOfChannels; |
|
const sampleRate = buffer.sampleRate; |
|
const bitRate = 128; |
|
|
|
|
|
|
|
const mp3encoder = new lamejs.Mp3Encoder( |
|
numChannels >= 2 ? 2 : 1, |
|
sampleRate, |
|
bitRate |
|
); |
|
|
|
const samples = buffer.length; |
|
const chunkSize = 1152; |
|
|
|
|
|
const channels: Float32Array[] = []; |
|
for (let ch = 0; ch < numChannels; ch++) { |
|
channels.push(buffer.getChannelData(ch)); |
|
} |
|
|
|
const mp3Data: Uint8Array[] = []; |
|
|
|
|
|
if (numChannels === 1) { |
|
for (let i = 0; i < samples; i += chunkSize) { |
|
const sampleChunk = channels[0].subarray(i, i + chunkSize); |
|
const int16Buffer = floatTo16BitPCM(sampleChunk); |
|
const mp3buf = mp3encoder.encodeBuffer(int16Buffer); |
|
if (mp3buf.length > 0) { |
|
mp3Data.push(new Uint8Array(mp3buf)); |
|
} |
|
} |
|
} else { |
|
|
|
const left = channels[0]; |
|
const right = channels[1]; |
|
for (let i = 0; i < samples; i += chunkSize) { |
|
const leftChunk = left.subarray(i, i + chunkSize); |
|
const rightChunk = right.subarray(i, i + chunkSize); |
|
const leftInt16 = floatTo16BitPCM(leftChunk); |
|
const rightInt16 = floatTo16BitPCM(rightChunk); |
|
const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16); |
|
if (mp3buf.length > 0) { |
|
mp3Data.push(new Uint8Array(mp3buf)); |
|
} |
|
} |
|
} |
|
|
|
|
|
const endBuf = mp3encoder.flush(); |
|
if (endBuf.length > 0) { |
|
mp3Data.push(new Uint8Array(endBuf)); |
|
} |
|
|
|
|
|
const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0); |
|
const result = new Uint8Array(totalLength); |
|
let offset = 0; |
|
for (const chunk of mp3Data) { |
|
result.set(chunk, offset); |
|
offset += chunk.length; |
|
} |
|
|
|
return result.buffer; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function floatTo16BitPCM(input: Float32Array): Int16Array { |
|
const output = new Int16Array(input.length); |
|
for (let i = 0; i < input.length; i++) { |
|
const s = Math.max(-1, Math.min(1, input[i])); |
|
output[i] = s < 0 ? s * 0x8000 : s * 0x7fff; |
|
} |
|
return output; |
|
} |
|
|
|
|
|
export const cleanupFilename = (name: string): string => { |
|
return name.replace(/[^a-zA-Z0-9-_]/g, '_'); |
|
}; |
|
|