|
|
|
import TextLineStream from 'textlinestream';
|
|
import { Client } from '@gradio/client';
|
|
import * as lamejs from '@breezystack/lamejs';
|
|
|
|
|
|
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
|
|
import { CONFIG } from '../config';
|
|
import { uploadFiles } from '@huggingface/hub';
|
|
import { denoiseBuffer } from 'audio-denoiser';
|
|
|
|
|
|
export const isDev: boolean = import.meta.env.MODE === 'development';
|
|
export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
|
|
export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
|
|
|
|
|
|
export const generateAudio = async (
|
|
content: string,
|
|
voice: string,
|
|
speed: number = 1.1
|
|
): Promise<string> => {
|
|
const maxRetries = 3;
|
|
for (let i = 0; i < maxRetries; i++) {
|
|
try {
|
|
const client = await Client.connect(CONFIG.ttsSpaceId);
|
|
const result = await client.predict('/tts', {
|
|
text: content,
|
|
voice,
|
|
speed,
|
|
});
|
|
|
|
console.log(result.data);
|
|
return (result.data as any)[0].url;
|
|
} catch (e) {
|
|
if (i === maxRetries - 1) {
|
|
throw e;
|
|
}
|
|
console.error('Failed to generate audio, retrying...', e);
|
|
}
|
|
continue;
|
|
}
|
|
return '';
|
|
};
|
|
|
|
export const pickRand = <T>(arr: T[]): T => {
|
|
return arr[Math.floor(Math.random() * arr.length)];
|
|
};
|
|
|
|
|
|
export async function* getSSEStreamAsync(fetchResponse: Response) {
|
|
if (!fetchResponse.body) throw new Error('Response body is empty');
|
|
const lines: ReadableStream<string> = fetchResponse.body
|
|
.pipeThrough(new TextDecoderStream())
|
|
.pipeThrough(new TextLineStream());
|
|
|
|
for await (const line of asyncIterator(lines)) {
|
|
|
|
if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
|
|
const data = JSON.parse(line.slice(5));
|
|
yield data;
|
|
} else if (line.startsWith('error:')) {
|
|
const data = JSON.parse(line.slice(6));
|
|
throw new Error(data.message || 'Unknown error');
|
|
}
|
|
}
|
|
}
|
|
|
|
export const uploadFileToHub = async (
|
|
buf: ArrayBuffer,
|
|
filename: string,
|
|
repoId: string,
|
|
hfToken: string
|
|
) => {
|
|
await uploadFiles({
|
|
accessToken: hfToken,
|
|
repo: repoId,
|
|
files: [
|
|
{
|
|
path: filename,
|
|
content: new Blob([buf], { type: 'audio/wav' }),
|
|
},
|
|
],
|
|
});
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
|
|
const threshold = 0.01;
|
|
const numChannels = audioBuffer.numberOfChannels;
|
|
const totalSamples = audioBuffer.length;
|
|
|
|
|
|
const isSilent = (index: number): boolean => {
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const channelData = audioBuffer.getChannelData(channel);
|
|
if (Math.abs(channelData[index]) > threshold) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
|
|
|
|
let startSample = 0;
|
|
while (startSample < totalSamples && isSilent(startSample)) {
|
|
startSample++;
|
|
}
|
|
|
|
|
|
let endSample = totalSamples - 1;
|
|
while (endSample >= startSample && isSilent(endSample)) {
|
|
endSample--;
|
|
}
|
|
|
|
|
|
if (startSample >= totalSamples || endSample < startSample) {
|
|
return new AudioBuffer({
|
|
length: 1,
|
|
numberOfChannels: numChannels,
|
|
sampleRate: audioBuffer.sampleRate,
|
|
});
|
|
}
|
|
|
|
const newLength = endSample - startSample + 1;
|
|
const newBuffer = new AudioBuffer({
|
|
length: newLength,
|
|
numberOfChannels: numChannels,
|
|
sampleRate: audioBuffer.sampleRate,
|
|
});
|
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const oldData = audioBuffer.getChannelData(channel);
|
|
const newData = newBuffer.getChannelData(channel);
|
|
for (let i = 0; i < newLength; i++) {
|
|
newData[i] = oldData[startSample + i];
|
|
}
|
|
}
|
|
|
|
return newBuffer;
|
|
};
|
|
|
|
export const joinAudio = (
|
|
audio1: AudioBuffer,
|
|
audio2: AudioBuffer,
|
|
gapMilisecs: number,
|
|
overlap: 'none' | 'cross-fade' = 'none'
|
|
): AudioBuffer => {
|
|
const sampleRate = audio1.sampleRate;
|
|
const numChannels = audio1.numberOfChannels;
|
|
|
|
|
|
if (audio2.sampleRate !== sampleRate) {
|
|
throw new Error('Audio buffers must have the same sample rate');
|
|
}
|
|
if (audio2.numberOfChannels !== numChannels) {
|
|
throw new Error('Audio buffers must have the same number of channels');
|
|
}
|
|
|
|
const gapSeconds = gapMilisecs / 1000;
|
|
let newLength: number;
|
|
|
|
if (gapSeconds > 0) {
|
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate);
|
|
newLength = audio1.length + gapSamples + audio2.length;
|
|
} else if (gapSeconds === 0) {
|
|
|
|
newLength = audio1.length + audio2.length;
|
|
} else {
|
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
|
|
|
|
const effectiveOverlap = Math.min(
|
|
overlapSamplesRequested,
|
|
audio1.length,
|
|
audio2.length
|
|
);
|
|
newLength = audio1.length + audio2.length - effectiveOverlap;
|
|
}
|
|
|
|
|
|
const newBuffer = new AudioBuffer({
|
|
length: newLength,
|
|
numberOfChannels: numChannels,
|
|
sampleRate: sampleRate,
|
|
});
|
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const outputData = newBuffer.getChannelData(channel);
|
|
const data1 = audio1.getChannelData(channel);
|
|
const data2 = audio2.getChannelData(channel);
|
|
let offset = 0;
|
|
|
|
if (gapSeconds < 0) {
|
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
|
|
const effectiveOverlap = Math.min(
|
|
overlapSamplesRequested,
|
|
audio1.length,
|
|
audio2.length
|
|
);
|
|
|
|
|
|
const nonOverlapLength = audio1.length - effectiveOverlap;
|
|
outputData.set(data1.subarray(0, nonOverlapLength), offset);
|
|
offset += nonOverlapLength;
|
|
|
|
|
|
if (overlap === 'cross-fade') {
|
|
for (let i = 0; i < effectiveOverlap; i++) {
|
|
|
|
const fadeOut = 1 - i / effectiveOverlap;
|
|
const fadeIn = i / effectiveOverlap;
|
|
outputData[offset + i] =
|
|
data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
|
|
}
|
|
} else {
|
|
for (let i = 0; i < effectiveOverlap; i++) {
|
|
outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
|
|
}
|
|
}
|
|
offset += effectiveOverlap;
|
|
|
|
|
|
outputData.set(data2.subarray(effectiveOverlap), offset);
|
|
} else if (gapSeconds === 0) {
|
|
|
|
outputData.set(data1, offset);
|
|
offset += audio1.length;
|
|
outputData.set(data2, offset);
|
|
} else {
|
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate);
|
|
outputData.set(data1, offset);
|
|
offset += audio1.length;
|
|
|
|
|
|
offset += gapSamples;
|
|
|
|
outputData.set(data2, offset);
|
|
}
|
|
}
|
|
|
|
return newBuffer;
|
|
};
|
|
|
|
export const addNoise = (
|
|
audioBuffer: AudioBuffer,
|
|
magnitude: number
|
|
): AudioBuffer => {
|
|
const { numberOfChannels, sampleRate, length } = audioBuffer;
|
|
const newBuffer = new AudioBuffer({
|
|
length,
|
|
numberOfChannels,
|
|
sampleRate,
|
|
});
|
|
|
|
for (let channel = 0; channel < numberOfChannels; channel++) {
|
|
const inputData = audioBuffer.getChannelData(channel);
|
|
const outputData = newBuffer.getChannelData(channel);
|
|
|
|
for (let i = 0; i < length; i++) {
|
|
|
|
const noise = (Math.random() * 2 - 1) * magnitude;
|
|
outputData[i] = inputData[i] + noise;
|
|
}
|
|
}
|
|
|
|
return newBuffer;
|
|
};
|
|
|
|
export const addSilence = (
|
|
audioBuffer: AudioBuffer,
|
|
toBeginning: boolean,
|
|
durationMilisecs: number
|
|
): AudioBuffer => {
|
|
|
|
const sampleRate = audioBuffer.sampleRate;
|
|
const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate);
|
|
const numChannels = audioBuffer.numberOfChannels;
|
|
const originalLength = audioBuffer.length;
|
|
const newLength = originalLength + silenceSamples;
|
|
|
|
|
|
const newBuffer = new AudioBuffer({
|
|
length: newLength,
|
|
numberOfChannels: numChannels,
|
|
sampleRate: sampleRate,
|
|
});
|
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const originalData = audioBuffer.getChannelData(channel);
|
|
const newData = newBuffer.getChannelData(channel);
|
|
|
|
if (toBeginning) {
|
|
|
|
newData.set(originalData, silenceSamples);
|
|
} else {
|
|
|
|
newData.set(originalData, 0);
|
|
}
|
|
}
|
|
|
|
return newBuffer;
|
|
};
|
|
|
|
export const denoiseAudioBuffer = async (audioBuffer: AudioBuffer): Promise<AudioBuffer> => {
|
|
try {
|
|
console.log("Denoising audio...");
|
|
const denoisedBuffer = await denoiseBuffer(audioBuffer);
|
|
if (!denoisedBuffer) {
|
|
console.warn("Denoising returned null. Returning original buffer.");
|
|
return audioBuffer;
|
|
}
|
|
|
|
return denoisedBuffer;
|
|
} catch (error) {
|
|
console.error("Error during denoising:", error);
|
|
|
|
return audioBuffer;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => {
|
|
const response = await fetch(url);
|
|
const arrayBuffer = await response.arrayBuffer();
|
|
|
|
const AudioContext = window.AudioContext || window.webkitAudioContext;
|
|
if (!AudioContext) {
|
|
throw new Error('AudioContext is not supported on this browser');
|
|
}
|
|
const audioCtx = new AudioContext();
|
|
let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
|
|
|
if (audioBuffer.numberOfChannels > 1) {
|
|
const monoBuffer = new AudioContext().createBuffer(
|
|
1,
|
|
audioBuffer.length,
|
|
audioBuffer.sampleRate
|
|
);
|
|
const monoData = monoBuffer.getChannelData(0);
|
|
for (let i = 0; i < audioBuffer.length; i++) {
|
|
let sum = 0;
|
|
for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
|
|
sum += audioBuffer.getChannelData(channel)[i];
|
|
}
|
|
monoData[i] = sum / audioBuffer.numberOfChannels;
|
|
}
|
|
audioBuffer = monoBuffer;
|
|
}
|
|
return audioBuffer;
|
|
};
|
|
|
|
export function audioBufferToWav(
|
|
buffer: AudioBuffer,
|
|
options: { float32?: boolean } = {}
|
|
): ArrayBuffer {
|
|
const numChannels = buffer.numberOfChannels;
|
|
const sampleRate = buffer.sampleRate;
|
|
const format = options.float32 ? 3 : 1;
|
|
const bitDepth = options.float32 ? 32 : 16;
|
|
|
|
const numSamples = buffer.length;
|
|
const headerLength = 44;
|
|
const bytesPerSample = bitDepth / 8;
|
|
const dataLength = numSamples * numChannels * bytesPerSample;
|
|
const bufferLength = headerLength + dataLength;
|
|
|
|
const arrayBuffer = new ArrayBuffer(bufferLength);
|
|
const view = new DataView(arrayBuffer);
|
|
let offset = 0;
|
|
|
|
function writeString(str: string) {
|
|
for (let i = 0; i < str.length; i++) {
|
|
view.setUint8(offset, str.charCodeAt(i));
|
|
offset++;
|
|
}
|
|
}
|
|
|
|
|
|
writeString('RIFF');
|
|
view.setUint32(offset, 36 + dataLength, true);
|
|
offset += 4;
|
|
writeString('WAVE');
|
|
writeString('fmt ');
|
|
view.setUint32(offset, 16, true);
|
|
offset += 4;
|
|
view.setUint16(offset, format, true);
|
|
offset += 2;
|
|
view.setUint16(offset, numChannels, true);
|
|
offset += 2;
|
|
view.setUint32(offset, sampleRate, true);
|
|
offset += 4;
|
|
view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true);
|
|
offset += 4;
|
|
view.setUint16(offset, numChannels * bytesPerSample, true);
|
|
offset += 2;
|
|
view.setUint16(offset, bitDepth, true);
|
|
offset += 2;
|
|
writeString('data');
|
|
view.setUint32(offset, dataLength, true);
|
|
offset += 4;
|
|
|
|
|
|
const channels: Float32Array[] = [];
|
|
for (let i = 0; i < numChannels; i++) {
|
|
channels.push(buffer.getChannelData(i));
|
|
}
|
|
|
|
for (let i = 0; i < numSamples; i++) {
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
let sample = channels[channel][i];
|
|
|
|
sample = Math.max(-1, Math.min(1, sample));
|
|
if (options.float32) {
|
|
view.setFloat32(offset, sample, true);
|
|
offset += 4;
|
|
} else {
|
|
|
|
const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
|
|
view.setInt16(offset, intSample, true);
|
|
offset += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
return arrayBuffer;
|
|
}
|
|
|
|
export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
|
|
|
|
const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
|
|
return new Blob([wavArrayBuffer], { type: 'audio/wav' });
|
|
};
|
|
|
|
export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
|
|
const numChannels = buffer.numberOfChannels;
|
|
const sampleRate = buffer.sampleRate;
|
|
const bitRate = 128;
|
|
|
|
|
|
|
|
const mp3encoder = new lamejs.Mp3Encoder(
|
|
numChannels >= 2 ? 2 : 1,
|
|
sampleRate,
|
|
bitRate
|
|
);
|
|
|
|
const samples = buffer.length;
|
|
const chunkSize = 1152;
|
|
|
|
|
|
const channels: Float32Array[] = [];
|
|
for (let ch = 0; ch < numChannels; ch++) {
|
|
channels.push(buffer.getChannelData(ch));
|
|
}
|
|
|
|
const mp3Data: Uint8Array[] = [];
|
|
|
|
|
|
if (numChannels === 1) {
|
|
for (let i = 0; i < samples; i += chunkSize) {
|
|
const sampleChunk = channels[0].subarray(i, i + chunkSize);
|
|
const int16Buffer = floatTo16BitPCM(sampleChunk);
|
|
const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
|
|
if (mp3buf.length > 0) {
|
|
mp3Data.push(new Uint8Array(mp3buf));
|
|
}
|
|
}
|
|
} else {
|
|
|
|
const left = channels[0];
|
|
const right = channels[1];
|
|
for (let i = 0; i < samples; i += chunkSize) {
|
|
const leftChunk = left.subarray(i, i + chunkSize);
|
|
const rightChunk = right.subarray(i, i + chunkSize);
|
|
const leftInt16 = floatTo16BitPCM(leftChunk);
|
|
const rightInt16 = floatTo16BitPCM(rightChunk);
|
|
const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
|
|
if (mp3buf.length > 0) {
|
|
mp3Data.push(new Uint8Array(mp3buf));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
const endBuf = mp3encoder.flush();
|
|
if (endBuf.length > 0) {
|
|
mp3Data.push(new Uint8Array(endBuf));
|
|
}
|
|
|
|
|
|
const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
|
|
const result = new Uint8Array(totalLength);
|
|
let offset = 0;
|
|
for (const chunk of mp3Data) {
|
|
result.set(chunk, offset);
|
|
offset += chunk.length;
|
|
}
|
|
|
|
return result.buffer;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function floatTo16BitPCM(input: Float32Array): Int16Array {
|
|
const output = new Int16Array(input.length);
|
|
for (let i = 0; i < input.length; i++) {
|
|
const s = Math.max(-1, Math.min(1, input[i]));
|
|
output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
|
}
|
|
return output;
|
|
} |