add noise for super real experience
Browse files- front/src/components/PodcastGenerator.tsx +4 -0
- front/src/components/ScriptMaker.tsx +1 -0
- front/src/utils/prompts.ts +5 -3
- front/src/utils/utils.ts +25 -0
- index.html +92 -3
front/src/components/PodcastGenerator.tsx
CHANGED
@@ -3,11 +3,14 @@ import { AudioPlayer } from './AudioPlayer';
|
|
3 |
import { Podcast, PodcastTurn } from '../utils/types';
|
4 |
import { parse } from 'yaml';
|
5 |
import {
|
|
|
6 |
generateAudio,
|
7 |
joinAudio,
|
8 |
loadWavAndDecode,
|
9 |
pickRand,
|
10 |
} from '../utils/utils';
|
|
|
|
|
11 |
import openingSoundSrc from '../opening-sound.wav';
|
12 |
|
13 |
interface GenerationStep {
|
@@ -144,6 +147,7 @@ export const PodcastGenerator = ({
|
|
144 |
}
|
145 |
setNumStepsDone(i + 1);
|
146 |
}
|
|
|
147 |
setWav(outputWav! ?? null);
|
148 |
} catch (e) {
|
149 |
console.error(e);
|
|
|
3 |
import { Podcast, PodcastTurn } from '../utils/types';
|
4 |
import { parse } from 'yaml';
|
5 |
import {
|
6 |
+
addNoise,
|
7 |
generateAudio,
|
8 |
joinAudio,
|
9 |
loadWavAndDecode,
|
10 |
pickRand,
|
11 |
} from '../utils/utils';
|
12 |
+
|
13 |
+
// taken from https://freesound.org/people/artxmp1/sounds/660540
|
14 |
import openingSoundSrc from '../opening-sound.wav';
|
15 |
|
16 |
interface GenerationStep {
|
|
|
147 |
}
|
148 |
setNumStepsDone(i + 1);
|
149 |
}
|
150 |
+
outputWav = addNoise(outputWav!, 0.002);
|
151 |
setWav(outputWav! ?? null);
|
152 |
} catch (e) {
|
153 |
console.error(e);
|
front/src/components/ScriptMaker.tsx
CHANGED
@@ -164,6 +164,7 @@ export const ScriptMaker = ({
|
|
164 |
className="select select-bordered"
|
165 |
value={model}
|
166 |
onChange={(e) => setModel(e.target.value)}
|
|
|
167 |
>
|
168 |
{CONFIG.inferenceProviderModels.map((s) => (
|
169 |
<option key={s} value={s}>
|
|
|
164 |
className="select select-bordered"
|
165 |
value={model}
|
166 |
onChange={(e) => setModel(e.target.value)}
|
167 |
+
disabled={isGenerating || busy}
|
168 |
>
|
169 |
{CONFIG.inferenceProviderModels.map((s) => (
|
170 |
<option key={s} value={s}>
|
front/src/utils/prompts.ts
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
export const getPromptGeneratePodcastScript = (content: string, note: string) =>
|
2 |
`
|
3 |
|
4 |
-
You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
|
5 |
|
6 |
Some rules:
|
7 |
- Must output YAML format, must be wrapped inside mardown code block.
|
@@ -39,10 +39,10 @@ turns:
|
|
39 |
[END OF EXAMPLE]
|
40 |
|
41 |
The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
|
42 |
-
The output text will be passed to TTS engine, make sure to be clean:
|
43 |
- Write NUMBER and abbreviations as WORDS, as they are pronounced
|
44 |
- For some less-common abbreviations, write the full words
|
45 |
-
- Use ... for pauses, " and ' and ! and ? for intonation
|
46 |
- IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
|
47 |
|
48 |
Make it engaging and have fun!
|
@@ -57,4 +57,6 @@ ${content}
|
|
57 |
${note.length < 1 ? '(No note provided)' : note}
|
58 |
[END OF NOTE]
|
59 |
|
|
|
|
|
60 |
`.trim();
|
|
|
1 |
export const getPromptGeneratePodcastScript = (content: string, note: string) =>
|
2 |
`
|
3 |
|
4 |
+
You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a detailed plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
|
5 |
|
6 |
Some rules:
|
7 |
- Must output YAML format, must be wrapped inside mardown code block.
|
|
|
39 |
[END OF EXAMPLE]
|
40 |
|
41 |
The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
|
42 |
+
The output text will be passed to TTS engine, make sure to be clean and natural:
|
43 |
- Write NUMBER and abbreviations as WORDS, as they are pronounced
|
44 |
- For some less-common abbreviations, write the full words
|
45 |
+
- Use ... for pauses (IMPORTANT to add pauses), " and ' and ! and ? for intonation
|
46 |
- IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
|
47 |
|
48 |
Make it engaging and have fun!
|
|
|
57 |
${note.length < 1 ? '(No note provided)' : note}
|
58 |
[END OF NOTE]
|
59 |
|
60 |
+
Now, think about a detailed plan.
|
61 |
+
|
62 |
`.trim();
|
front/src/utils/utils.ts
CHANGED
@@ -217,6 +217,31 @@ export const joinAudio = (
|
|
217 |
return newBuffer;
|
218 |
};
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
////////////////////////////////////////
|
221 |
// Audio formatting utils
|
222 |
|
|
|
217 |
return newBuffer;
|
218 |
};
|
219 |
|
220 |
+
export const addNoise = (
|
221 |
+
audioBuffer: AudioBuffer,
|
222 |
+
magnitude: number
|
223 |
+
): AudioBuffer => {
|
224 |
+
const { numberOfChannels, sampleRate, length } = audioBuffer;
|
225 |
+
const newBuffer = new AudioBuffer({
|
226 |
+
length,
|
227 |
+
numberOfChannels,
|
228 |
+
sampleRate,
|
229 |
+
});
|
230 |
+
|
231 |
+
for (let channel = 0; channel < numberOfChannels; channel++) {
|
232 |
+
const inputData = audioBuffer.getChannelData(channel);
|
233 |
+
const outputData = newBuffer.getChannelData(channel);
|
234 |
+
|
235 |
+
for (let i = 0; i < length; i++) {
|
236 |
+
// Generate white noise in the range [-magnitude, +magnitude].
|
237 |
+
const noise = (Math.random() * 2 - 1) * magnitude;
|
238 |
+
outputData[i] = inputData[i] + noise;
|
239 |
+
}
|
240 |
+
}
|
241 |
+
|
242 |
+
return newBuffer;
|
243 |
+
};
|
244 |
+
|
245 |
////////////////////////////////////////
|
246 |
// Audio formatting utils
|
247 |
|
index.html
CHANGED
@@ -14792,6 +14792,23 @@ const joinAudio = (audio1, audio2, gapSeconds, overlap = "none") => {
|
|
14792 |
}
|
14793 |
return newBuffer;
|
14794 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14795 |
const loadWavAndDecode = async (url) => {
|
14796 |
const response = await fetch(url);
|
14797 |
const arrayBuffer = await response.arrayBuffer();
|
@@ -21020,6 +21037,7 @@ const PodcastGenerator = ({
|
|
21020 |
}
|
21021 |
setNumStepsDone(i + 1);
|
21022 |
}
|
|
|
21023 |
setWav(outputWav ?? null);
|
21024 |
} catch (e) {
|
21025 |
console.error(e);
|
@@ -21120,7 +21138,7 @@ const PodcastGenerator = ({
|
|
21120 |
};
|
21121 |
const getPromptGeneratePodcastScript = (content, note) => `
|
21122 |
|
21123 |
-
You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
|
21124 |
|
21125 |
Some rules:
|
21126 |
- Must output YAML format, must be wrapped inside mardown code block.
|
@@ -21158,10 +21176,10 @@ turns:
|
|
21158 |
[END OF EXAMPLE]
|
21159 |
|
21160 |
The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
|
21161 |
-
The output text will be passed to TTS engine, make sure to be clean:
|
21162 |
- Write NUMBER and abbreviations as WORDS, as they are pronounced
|
21163 |
- For some less-common abbreviations, write the full words
|
21164 |
-
- Use ... for pauses, " and ' and ! and ? for intonation
|
21165 |
- IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
|
21166 |
|
21167 |
Make it engaging and have fun!
|
@@ -21176,6 +21194,8 @@ ${content}
|
|
21176 |
${note.length < 1 ? "(No note provided)" : note}
|
21177 |
[END OF NOTE]
|
21178 |
|
|
|
|
|
21179 |
`.trim();
|
21180 |
const EXAMPLES = [
|
21181 |
{
|
@@ -22655,6 +22675,7 @@ const ScriptMaker = ({
|
|
22655 |
className: "select select-bordered",
|
22656 |
value: model,
|
22657 |
onChange: (e) => setModel(e.target.value),
|
|
|
22658 |
children: [
|
22659 |
CONFIG.inferenceProviderModels.map((s) => /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: s, children: s }, s)),
|
22660 |
/* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: "custom", children: "Custom" })
|
@@ -27814,6 +27835,21 @@ html {
|
|
27814 |
border-radius: var(--rounded-box, 1rem);
|
27815 |
background-color: var(--fallback-bc,oklch(var(--bc)/0.2));
|
27816 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27817 |
.select {
|
27818 |
display: inline-flex;
|
27819 |
cursor: pointer;
|
@@ -28295,6 +28331,59 @@ html {
|
|
28295 |
0 0 0 4px var(--fallback-b1,oklch(var(--b1)/1)) inset;
|
28296 |
}
|
28297 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28298 |
@keyframes rating-pop {
|
28299 |
|
28300 |
0% {
|
|
|
14792 |
}
|
14793 |
return newBuffer;
|
14794 |
};
|
14795 |
+
const addNoise = (audioBuffer, magnitude) => {
|
14796 |
+
const { numberOfChannels, sampleRate, length } = audioBuffer;
|
14797 |
+
const newBuffer = new AudioBuffer({
|
14798 |
+
length,
|
14799 |
+
numberOfChannels,
|
14800 |
+
sampleRate
|
14801 |
+
});
|
14802 |
+
for (let channel = 0; channel < numberOfChannels; channel++) {
|
14803 |
+
const inputData = audioBuffer.getChannelData(channel);
|
14804 |
+
const outputData = newBuffer.getChannelData(channel);
|
14805 |
+
for (let i = 0; i < length; i++) {
|
14806 |
+
const noise = (Math.random() * 2 - 1) * magnitude;
|
14807 |
+
outputData[i] = inputData[i] + noise;
|
14808 |
+
}
|
14809 |
+
}
|
14810 |
+
return newBuffer;
|
14811 |
+
};
|
14812 |
const loadWavAndDecode = async (url) => {
|
14813 |
const response = await fetch(url);
|
14814 |
const arrayBuffer = await response.arrayBuffer();
|
|
|
21037 |
}
|
21038 |
setNumStepsDone(i + 1);
|
21039 |
}
|
21040 |
+
outputWav = addNoise(outputWav, 2e-3);
|
21041 |
setWav(outputWav ?? null);
|
21042 |
} catch (e) {
|
21043 |
console.error(e);
|
|
|
21138 |
};
|
21139 |
const getPromptGeneratePodcastScript = (content, note) => `
|
21140 |
|
21141 |
+
You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a detailed plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
|
21142 |
|
21143 |
Some rules:
|
21144 |
- Must output YAML format, must be wrapped inside mardown code block.
|
|
|
21176 |
[END OF EXAMPLE]
|
21177 |
|
21178 |
The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
|
21179 |
+
The output text will be passed to TTS engine, make sure to be clean and natural:
|
21180 |
- Write NUMBER and abbreviations as WORDS, as they are pronounced
|
21181 |
- For some less-common abbreviations, write the full words
|
21182 |
+
- Use ... for pauses (IMPORTANT to add pauses), " and ' and ! and ? for intonation
|
21183 |
- IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
|
21184 |
|
21185 |
Make it engaging and have fun!
|
|
|
21194 |
${note.length < 1 ? "(No note provided)" : note}
|
21195 |
[END OF NOTE]
|
21196 |
|
21197 |
+
Now, think about a detailed plan.
|
21198 |
+
|
21199 |
`.trim();
|
21200 |
const EXAMPLES = [
|
21201 |
{
|
|
|
22675 |
className: "select select-bordered",
|
22676 |
value: model,
|
22677 |
onChange: (e) => setModel(e.target.value),
|
22678 |
+
disabled: isGenerating || busy,
|
22679 |
children: [
|
22680 |
CONFIG.inferenceProviderModels.map((s) => /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: s, children: s }, s)),
|
22681 |
/* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: "custom", children: "Custom" })
|
|
|
27835 |
border-radius: var(--rounded-box, 1rem);
|
27836 |
background-color: var(--fallback-bc,oklch(var(--bc)/0.2));
|
27837 |
}
|
27838 |
+
.range {
|
27839 |
+
height: 1.5rem;
|
27840 |
+
width: 100%;
|
27841 |
+
cursor: pointer;
|
27842 |
+
-moz-appearance: none;
|
27843 |
+
appearance: none;
|
27844 |
+
-webkit-appearance: none;
|
27845 |
+
--range-shdw: var(--fallback-bc,oklch(var(--bc)/1));
|
27846 |
+
overflow: hidden;
|
27847 |
+
border-radius: var(--rounded-box, 1rem);
|
27848 |
+
background-color: transparent;
|
27849 |
+
}
|
27850 |
+
.range:focus {
|
27851 |
+
outline: none;
|
27852 |
+
}
|
27853 |
.select {
|
27854 |
display: inline-flex;
|
27855 |
cursor: pointer;
|
|
|
28331 |
0 0 0 4px var(--fallback-b1,oklch(var(--b1)/1)) inset;
|
28332 |
}
|
28333 |
}
|
28334 |
+
.range:focus-visible::-webkit-slider-thumb {
|
28335 |
+
--focus-shadow: 0 0 0 6px var(--fallback-b1,oklch(var(--b1)/1)) inset, 0 0 0 2rem var(--range-shdw) inset;
|
28336 |
+
}
|
28337 |
+
.range:focus-visible::-moz-range-thumb {
|
28338 |
+
--focus-shadow: 0 0 0 6px var(--fallback-b1,oklch(var(--b1)/1)) inset, 0 0 0 2rem var(--range-shdw) inset;
|
28339 |
+
}
|
28340 |
+
.range::-webkit-slider-runnable-track {
|
28341 |
+
height: 0.5rem;
|
28342 |
+
width: 100%;
|
28343 |
+
border-radius: var(--rounded-box, 1rem);
|
28344 |
+
background-color: var(--fallback-bc,oklch(var(--bc)/0.1));
|
28345 |
+
}
|
28346 |
+
.range::-moz-range-track {
|
28347 |
+
height: 0.5rem;
|
28348 |
+
width: 100%;
|
28349 |
+
border-radius: var(--rounded-box, 1rem);
|
28350 |
+
background-color: var(--fallback-bc,oklch(var(--bc)/0.1));
|
28351 |
+
}
|
28352 |
+
.range::-webkit-slider-thumb {
|
28353 |
+
position: relative;
|
28354 |
+
height: 1.5rem;
|
28355 |
+
width: 1.5rem;
|
28356 |
+
border-radius: var(--rounded-box, 1rem);
|
28357 |
+
border-style: none;
|
28358 |
+
--tw-bg-opacity: 1;
|
28359 |
+
background-color: var(--fallback-b1,oklch(var(--b1)/var(--tw-bg-opacity)));
|
28360 |
+
appearance: none;
|
28361 |
+
-webkit-appearance: none;
|
28362 |
+
top: 50%;
|
28363 |
+
color: var(--range-shdw);
|
28364 |
+
transform: translateY(-50%);
|
28365 |
+
--filler-size: 100rem;
|
28366 |
+
--filler-offset: 0.6rem;
|
28367 |
+
box-shadow: 0 0 0 3px var(--range-shdw) inset,
|
28368 |
+
var(--focus-shadow, 0 0),
|
28369 |
+
calc(var(--filler-size) * -1 - var(--filler-offset)) 0 0 var(--filler-size);
|
28370 |
+
}
|
28371 |
+
.range::-moz-range-thumb {
|
28372 |
+
position: relative;
|
28373 |
+
height: 1.5rem;
|
28374 |
+
width: 1.5rem;
|
28375 |
+
border-radius: var(--rounded-box, 1rem);
|
28376 |
+
border-style: none;
|
28377 |
+
--tw-bg-opacity: 1;
|
28378 |
+
background-color: var(--fallback-b1,oklch(var(--b1)/var(--tw-bg-opacity)));
|
28379 |
+
top: 50%;
|
28380 |
+
color: var(--range-shdw);
|
28381 |
+
--filler-size: 100rem;
|
28382 |
+
--filler-offset: 0.5rem;
|
28383 |
+
box-shadow: 0 0 0 3px var(--range-shdw) inset,
|
28384 |
+
var(--focus-shadow, 0 0),
|
28385 |
+
calc(var(--filler-size) * -1 - var(--filler-offset)) 0 0 var(--filler-size);
|
28386 |
+
}
|
28387 |
@keyframes rating-pop {
|
28388 |
|
28389 |
0% {
|