|
|
|
import TextLineStream from 'textlinestream'; |
|
import { Client } from '@gradio/client'; |
|
import decodeAudio from 'audio-decode'; |
|
|
|
|
|
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator'; |
|
import { CONFIG } from '../config'; |
|
|
|
|
|
export const generateAudio = async ( |
|
content: string, |
|
voice: string, |
|
speed: number = 1.1 |
|
): Promise<string> => { |
|
const client = await Client.connect(CONFIG.ttsSpaceId); |
|
const result = await client.predict('/tts', { |
|
text: content, |
|
voice, |
|
speed, |
|
}); |
|
|
|
console.log(result.data); |
|
return (result.data as any)[0].url; |
|
}; |
|
|
|
export const pickRand = <T>(arr: T[]): T => { |
|
return arr[Math.floor(Math.random() * arr.length)]; |
|
}; |
|
|
|
|
|
export async function* getSSEStreamAsync(fetchResponse: Response) { |
|
if (!fetchResponse.body) throw new Error('Response body is empty'); |
|
const lines: ReadableStream<string> = fetchResponse.body |
|
.pipeThrough(new TextDecoderStream()) |
|
.pipeThrough(new TextLineStream()); |
|
|
|
for await (const line of asyncIterator(lines)) { |
|
|
|
if (line.startsWith('data:') && !line.endsWith('[DONE]')) { |
|
const data = JSON.parse(line.slice(5)); |
|
yield data; |
|
} else if (line.startsWith('error:')) { |
|
const data = JSON.parse(line.slice(6)); |
|
throw new Error(data.message || 'Unknown error'); |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => { |
|
const threshold = 0.01; |
|
const numChannels = audioBuffer.numberOfChannels; |
|
const totalSamples = audioBuffer.length; |
|
|
|
|
|
const isSilent = (index: number): boolean => { |
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const channelData = audioBuffer.getChannelData(channel); |
|
if (Math.abs(channelData[index]) > threshold) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
}; |
|
|
|
|
|
let startSample = 0; |
|
while (startSample < totalSamples && isSilent(startSample)) { |
|
startSample++; |
|
} |
|
|
|
|
|
let endSample = totalSamples - 1; |
|
while (endSample >= startSample && isSilent(endSample)) { |
|
endSample--; |
|
} |
|
|
|
|
|
if (startSample >= totalSamples || endSample < startSample) { |
|
return new AudioBuffer({ |
|
length: 1, |
|
numberOfChannels: numChannels, |
|
sampleRate: audioBuffer.sampleRate, |
|
}); |
|
} |
|
|
|
const newLength = endSample - startSample + 1; |
|
const newBuffer = new AudioBuffer({ |
|
length: newLength, |
|
numberOfChannels: numChannels, |
|
sampleRate: audioBuffer.sampleRate, |
|
}); |
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const oldData = audioBuffer.getChannelData(channel); |
|
const newData = newBuffer.getChannelData(channel); |
|
for (let i = 0; i < newLength; i++) { |
|
newData[i] = oldData[startSample + i]; |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
export const joinAudio = ( |
|
audio1: AudioBuffer, |
|
audio2: AudioBuffer, |
|
gapSeconds: number |
|
): AudioBuffer => { |
|
const sampleRate = audio1.sampleRate; |
|
const numChannels = audio1.numberOfChannels; |
|
|
|
|
|
if (audio2.sampleRate !== sampleRate) { |
|
throw new Error('Audio buffers must have the same sample rate'); |
|
} |
|
if (audio2.numberOfChannels !== numChannels) { |
|
throw new Error('Audio buffers must have the same number of channels'); |
|
} |
|
|
|
let newLength: number; |
|
|
|
if (gapSeconds > 0) { |
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate); |
|
newLength = audio1.length + gapSamples + audio2.length; |
|
} else if (gapSeconds === 0) { |
|
|
|
newLength = audio1.length + audio2.length; |
|
} else { |
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); |
|
|
|
const effectiveOverlap = Math.min( |
|
overlapSamplesRequested, |
|
audio1.length, |
|
audio2.length |
|
); |
|
newLength = audio1.length + audio2.length - effectiveOverlap; |
|
} |
|
|
|
|
|
const newBuffer = new AudioBuffer({ |
|
length: newLength, |
|
numberOfChannels: numChannels, |
|
sampleRate: sampleRate, |
|
}); |
|
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) { |
|
const outputData = newBuffer.getChannelData(channel); |
|
const data1 = audio1.getChannelData(channel); |
|
const data2 = audio2.getChannelData(channel); |
|
let offset = 0; |
|
|
|
if (gapSeconds < 0) { |
|
|
|
const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); |
|
const effectiveOverlap = Math.min( |
|
overlapSamplesRequested, |
|
audio1.length, |
|
audio2.length |
|
); |
|
|
|
|
|
const nonOverlapLength = audio1.length - effectiveOverlap; |
|
outputData.set(data1.subarray(0, nonOverlapLength), offset); |
|
offset += nonOverlapLength; |
|
|
|
|
|
for (let i = 0; i < effectiveOverlap; i++) { |
|
|
|
const fadeOut = 1 - i / effectiveOverlap; |
|
const fadeIn = i / effectiveOverlap; |
|
outputData[offset + i] = |
|
data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn; |
|
} |
|
offset += effectiveOverlap; |
|
|
|
|
|
outputData.set(data2.subarray(effectiveOverlap), offset); |
|
} else if (gapSeconds === 0) { |
|
|
|
outputData.set(data1, offset); |
|
offset += audio1.length; |
|
outputData.set(data2, offset); |
|
} else { |
|
|
|
const gapSamples = Math.round(gapSeconds * sampleRate); |
|
outputData.set(data1, offset); |
|
offset += audio1.length; |
|
|
|
|
|
offset += gapSamples; |
|
|
|
outputData.set(data2, offset); |
|
} |
|
} |
|
|
|
return newBuffer; |
|
}; |
|
|
|
|
|
|
|
|
|
export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => { |
|
const response = await fetch(url); |
|
const arrayBuffer = await response.arrayBuffer(); |
|
const audioBuffer = await decodeAudio(arrayBuffer); |
|
return audioBuffer; |
|
}; |
|
|
|
export function audioBufferToWav( |
|
buffer: AudioBuffer, |
|
options: { float32?: boolean } = {} |
|
): ArrayBuffer { |
|
const numChannels = buffer.numberOfChannels; |
|
const sampleRate = buffer.sampleRate; |
|
const format = options.float32 ? 3 : 1; |
|
const bitDepth = options.float32 ? 32 : 16; |
|
|
|
const numSamples = buffer.length; |
|
const headerLength = 44; |
|
const bytesPerSample = bitDepth / 8; |
|
const dataLength = numSamples * numChannels * bytesPerSample; |
|
const bufferLength = headerLength + dataLength; |
|
|
|
const arrayBuffer = new ArrayBuffer(bufferLength); |
|
const view = new DataView(arrayBuffer); |
|
let offset = 0; |
|
|
|
function writeString(str: string) { |
|
for (let i = 0; i < str.length; i++) { |
|
view.setUint8(offset, str.charCodeAt(i)); |
|
offset++; |
|
} |
|
} |
|
|
|
|
|
writeString('RIFF'); |
|
view.setUint32(offset, 36 + dataLength, true); |
|
offset += 4; |
|
writeString('WAVE'); |
|
writeString('fmt '); |
|
view.setUint32(offset, 16, true); |
|
offset += 4; |
|
view.setUint16(offset, format, true); |
|
offset += 2; |
|
view.setUint16(offset, numChannels, true); |
|
offset += 2; |
|
view.setUint32(offset, sampleRate, true); |
|
offset += 4; |
|
view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true); |
|
offset += 4; |
|
view.setUint16(offset, numChannels * bytesPerSample, true); |
|
offset += 2; |
|
view.setUint16(offset, bitDepth, true); |
|
offset += 2; |
|
writeString('data'); |
|
view.setUint32(offset, dataLength, true); |
|
offset += 4; |
|
|
|
|
|
const channels: Float32Array[] = []; |
|
for (let i = 0; i < numChannels; i++) { |
|
channels.push(buffer.getChannelData(i)); |
|
} |
|
|
|
for (let i = 0; i < numSamples; i++) { |
|
for (let channel = 0; channel < numChannels; channel++) { |
|
let sample = channels[channel][i]; |
|
|
|
sample = Math.max(-1, Math.min(1, sample)); |
|
if (options.float32) { |
|
view.setFloat32(offset, sample, true); |
|
offset += 4; |
|
} else { |
|
|
|
const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff; |
|
view.setInt16(offset, intSample, true); |
|
offset += 2; |
|
} |
|
} |
|
} |
|
|
|
return arrayBuffer; |
|
} |
|
|
|
export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => { |
|
|
|
const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false }); |
|
return new Blob([wavArrayBuffer], { type: 'audio/wav' }); |
|
}; |
|
|