ngxson HF Staff commited on
Commit
16826f3
·
1 Parent(s): a414844

add noise for super real experience

Browse files
front/src/components/PodcastGenerator.tsx CHANGED
@@ -3,11 +3,14 @@ import { AudioPlayer } from './AudioPlayer';
3
  import { Podcast, PodcastTurn } from '../utils/types';
4
  import { parse } from 'yaml';
5
  import {
 
6
  generateAudio,
7
  joinAudio,
8
  loadWavAndDecode,
9
  pickRand,
10
  } from '../utils/utils';
 
 
11
  import openingSoundSrc from '../opening-sound.wav';
12
 
13
  interface GenerationStep {
@@ -144,6 +147,7 @@ export const PodcastGenerator = ({
144
  }
145
  setNumStepsDone(i + 1);
146
  }
 
147
  setWav(outputWav! ?? null);
148
  } catch (e) {
149
  console.error(e);
 
3
  import { Podcast, PodcastTurn } from '../utils/types';
4
  import { parse } from 'yaml';
5
  import {
6
+ addNoise,
7
  generateAudio,
8
  joinAudio,
9
  loadWavAndDecode,
10
  pickRand,
11
  } from '../utils/utils';
12
+
13
+ // taken from https://freesound.org/people/artxmp1/sounds/660540
14
  import openingSoundSrc from '../opening-sound.wav';
15
 
16
  interface GenerationStep {
 
147
  }
148
  setNumStepsDone(i + 1);
149
  }
150
+ outputWav = addNoise(outputWav!, 0.002);
151
  setWav(outputWav! ?? null);
152
  } catch (e) {
153
  console.error(e);
front/src/components/ScriptMaker.tsx CHANGED
@@ -164,6 +164,7 @@ export const ScriptMaker = ({
164
  className="select select-bordered"
165
  value={model}
166
  onChange={(e) => setModel(e.target.value)}
 
167
  >
168
  {CONFIG.inferenceProviderModels.map((s) => (
169
  <option key={s} value={s}>
 
164
  className="select select-bordered"
165
  value={model}
166
  onChange={(e) => setModel(e.target.value)}
167
+ disabled={isGenerating || busy}
168
  >
169
  {CONFIG.inferenceProviderModels.map((s) => (
170
  <option key={s} value={s}>
front/src/utils/prompts.ts CHANGED
@@ -1,7 +1,7 @@
1
  export const getPromptGeneratePodcastScript = (content: string, note: string) =>
2
  `
3
 
4
- You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
5
 
6
  Some rules:
7
  - Must output YAML format, must be wrapped inside mardown code block.
@@ -39,10 +39,10 @@ turns:
39
  [END OF EXAMPLE]
40
 
41
  The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
42
- The output text will be passed to TTS engine, make sure to be clean:
43
  - Write NUMBER and abbreviations as WORDS, as they are pronounced
44
  - For some less-common abbreviations, write the full words
45
- - Use ... for pauses, " and ' and ! and ? for intonation
46
  - IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
47
 
48
  Make it engaging and have fun!
@@ -57,4 +57,6 @@ ${content}
57
  ${note.length < 1 ? '(No note provided)' : note}
58
  [END OF NOTE]
59
 
 
 
60
  `.trim();
 
1
  export const getPromptGeneratePodcastScript = (content: string, note: string) =>
2
  `
3
 
4
+ You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a detailed plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
5
 
6
  Some rules:
7
  - Must output YAML format, must be wrapped inside mardown code block.
 
39
  [END OF EXAMPLE]
40
 
41
  The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
42
+ The output text will be passed to TTS engine, make sure to be clean and natural:
43
  - Write NUMBER and abbreviations as WORDS, as they are pronounced
44
  - For some less-common abbreviations, write the full words
45
+ - Use ... for pauses (IMPORTANT to add pauses), " and ' and ! and ? for intonation
46
  - IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
47
 
48
  Make it engaging and have fun!
 
57
  ${note.length < 1 ? '(No note provided)' : note}
58
  [END OF NOTE]
59
 
60
+ Now, think about a detailed plan.
61
+
62
  `.trim();
front/src/utils/utils.ts CHANGED
@@ -217,6 +217,31 @@ export const joinAudio = (
217
  return newBuffer;
218
  };
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  ////////////////////////////////////////
221
  // Audio formatting utils
222
 
 
217
  return newBuffer;
218
  };
219
 
220
+ export const addNoise = (
221
+ audioBuffer: AudioBuffer,
222
+ magnitude: number
223
+ ): AudioBuffer => {
224
+ const { numberOfChannels, sampleRate, length } = audioBuffer;
225
+ const newBuffer = new AudioBuffer({
226
+ length,
227
+ numberOfChannels,
228
+ sampleRate,
229
+ });
230
+
231
+ for (let channel = 0; channel < numberOfChannels; channel++) {
232
+ const inputData = audioBuffer.getChannelData(channel);
233
+ const outputData = newBuffer.getChannelData(channel);
234
+
235
+ for (let i = 0; i < length; i++) {
236
+ // Generate white noise in the range [-magnitude, +magnitude].
237
+ const noise = (Math.random() * 2 - 1) * magnitude;
238
+ outputData[i] = inputData[i] + noise;
239
+ }
240
+ }
241
+
242
+ return newBuffer;
243
+ };
244
+
245
  ////////////////////////////////////////
246
  // Audio formatting utils
247
 
index.html CHANGED
@@ -14792,6 +14792,23 @@ const joinAudio = (audio1, audio2, gapSeconds, overlap = "none") => {
14792
  }
14793
  return newBuffer;
14794
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14795
  const loadWavAndDecode = async (url) => {
14796
  const response = await fetch(url);
14797
  const arrayBuffer = await response.arrayBuffer();
@@ -21020,6 +21037,7 @@ const PodcastGenerator = ({
21020
  }
21021
  setNumStepsDone(i + 1);
21022
  }
 
21023
  setWav(outputWav ?? null);
21024
  } catch (e) {
21025
  console.error(e);
@@ -21120,7 +21138,7 @@ const PodcastGenerator = ({
21120
  };
21121
  const getPromptGeneratePodcastScript = (content, note) => `
21122
 
21123
- You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
21124
 
21125
  Some rules:
21126
  - Must output YAML format, must be wrapped inside mardown code block.
@@ -21158,10 +21176,10 @@ turns:
21158
  [END OF EXAMPLE]
21159
 
21160
  The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
21161
- The output text will be passed to TTS engine, make sure to be clean:
21162
  - Write NUMBER and abbreviations as WORDS, as they are pronounced
21163
  - For some less-common abbreviations, write the full words
21164
- - Use ... for pauses, " and ' and ! and ? for intonation
21165
  - IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
21166
 
21167
  Make it engaging and have fun!
@@ -21176,6 +21194,8 @@ ${content}
21176
  ${note.length < 1 ? "(No note provided)" : note}
21177
  [END OF NOTE]
21178
 
 
 
21179
  `.trim();
21180
  const EXAMPLES = [
21181
  {
@@ -22655,6 +22675,7 @@ const ScriptMaker = ({
22655
  className: "select select-bordered",
22656
  value: model,
22657
  onChange: (e) => setModel(e.target.value),
 
22658
  children: [
22659
  CONFIG.inferenceProviderModels.map((s) => /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: s, children: s }, s)),
22660
  /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: "custom", children: "Custom" })
@@ -27814,6 +27835,21 @@ html {
27814
  border-radius: var(--rounded-box, 1rem);
27815
  background-color: var(--fallback-bc,oklch(var(--bc)/0.2));
27816
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27817
  .select {
27818
  display: inline-flex;
27819
  cursor: pointer;
@@ -28295,6 +28331,59 @@ html {
28295
  0 0 0 4px var(--fallback-b1,oklch(var(--b1)/1)) inset;
28296
  }
28297
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28298
  @keyframes rating-pop {
28299
 
28300
  0% {
 
14792
  }
14793
  return newBuffer;
14794
  };
14795
+ const addNoise = (audioBuffer, magnitude) => {
14796
+ const { numberOfChannels, sampleRate, length } = audioBuffer;
14797
+ const newBuffer = new AudioBuffer({
14798
+ length,
14799
+ numberOfChannels,
14800
+ sampleRate
14801
+ });
14802
+ for (let channel = 0; channel < numberOfChannels; channel++) {
14803
+ const inputData = audioBuffer.getChannelData(channel);
14804
+ const outputData = newBuffer.getChannelData(channel);
14805
+ for (let i = 0; i < length; i++) {
14806
+ const noise = (Math.random() * 2 - 1) * magnitude;
14807
+ outputData[i] = inputData[i] + noise;
14808
+ }
14809
+ }
14810
+ return newBuffer;
14811
+ };
14812
  const loadWavAndDecode = async (url) => {
14813
  const response = await fetch(url);
14814
  const arrayBuffer = await response.arrayBuffer();
 
21037
  }
21038
  setNumStepsDone(i + 1);
21039
  }
21040
+ outputWav = addNoise(outputWav, 2e-3);
21041
  setWav(outputWav ?? null);
21042
  } catch (e) {
21043
  console.error(e);
 
21138
  };
21139
  const getPromptGeneratePodcastScript = (content, note) => `
21140
 
21141
+ You are a podcast script writter. You only output content in YAML format. Given a raw unstructured content, think about a detailed plan, then think more detailed how words can be written as pronunciations then write the podcast script in YAML format. Please also take into account the note from the podcast producer.
21142
 
21143
  Some rules:
21144
  - Must output YAML format, must be wrapped inside mardown code block.
 
21176
  [END OF EXAMPLE]
21177
 
21178
  The example above is truncated at index 1, REMEMBER TO CREATE AT LEAST 20 TURNS.
21179
+ The output text will be passed to TTS engine, make sure to be clean and natural:
21180
  - Write NUMBER and abbreviations as WORDS, as they are pronounced
21181
  - For some less-common abbreviations, write the full words
21182
+ - Use ... for pauses (IMPORTANT to add pauses), " and ' and ! and ? for intonation
21183
  - IMPORTANT!! Write nicknames and names as they are pronounced. For example, "lora_rank=2" becomes "lora rank equals two", or "LoRA" becomes "Lo Ra", or "CrossEntropyLoss" becomes "Cross Entropy Loss", or "6GB" becomes "six gigabytes", "A6000" becomes "A six thousands"
21184
 
21185
  Make it engaging and have fun!
 
21194
  ${note.length < 1 ? "(No note provided)" : note}
21195
  [END OF NOTE]
21196
 
21197
+ Now, think about a detailed plan.
21198
+
21199
  `.trim();
21200
  const EXAMPLES = [
21201
  {
 
22675
  className: "select select-bordered",
22676
  value: model,
22677
  onChange: (e) => setModel(e.target.value),
22678
+ disabled: isGenerating || busy,
22679
  children: [
22680
  CONFIG.inferenceProviderModels.map((s) => /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: s, children: s }, s)),
22681
  /* @__PURE__ */ jsxRuntimeExports.jsx("option", { value: "custom", children: "Custom" })
 
27835
  border-radius: var(--rounded-box, 1rem);
27836
  background-color: var(--fallback-bc,oklch(var(--bc)/0.2));
27837
  }
27838
+ .range {
27839
+ height: 1.5rem;
27840
+ width: 100%;
27841
+ cursor: pointer;
27842
+ -moz-appearance: none;
27843
+ appearance: none;
27844
+ -webkit-appearance: none;
27845
+ --range-shdw: var(--fallback-bc,oklch(var(--bc)/1));
27846
+ overflow: hidden;
27847
+ border-radius: var(--rounded-box, 1rem);
27848
+ background-color: transparent;
27849
+ }
27850
+ .range:focus {
27851
+ outline: none;
27852
+ }
27853
  .select {
27854
  display: inline-flex;
27855
  cursor: pointer;
 
28331
  0 0 0 4px var(--fallback-b1,oklch(var(--b1)/1)) inset;
28332
  }
28333
  }
28334
+ .range:focus-visible::-webkit-slider-thumb {
28335
+ --focus-shadow: 0 0 0 6px var(--fallback-b1,oklch(var(--b1)/1)) inset, 0 0 0 2rem var(--range-shdw) inset;
28336
+ }
28337
+ .range:focus-visible::-moz-range-thumb {
28338
+ --focus-shadow: 0 0 0 6px var(--fallback-b1,oklch(var(--b1)/1)) inset, 0 0 0 2rem var(--range-shdw) inset;
28339
+ }
28340
+ .range::-webkit-slider-runnable-track {
28341
+ height: 0.5rem;
28342
+ width: 100%;
28343
+ border-radius: var(--rounded-box, 1rem);
28344
+ background-color: var(--fallback-bc,oklch(var(--bc)/0.1));
28345
+ }
28346
+ .range::-moz-range-track {
28347
+ height: 0.5rem;
28348
+ width: 100%;
28349
+ border-radius: var(--rounded-box, 1rem);
28350
+ background-color: var(--fallback-bc,oklch(var(--bc)/0.1));
28351
+ }
28352
+ .range::-webkit-slider-thumb {
28353
+ position: relative;
28354
+ height: 1.5rem;
28355
+ width: 1.5rem;
28356
+ border-radius: var(--rounded-box, 1rem);
28357
+ border-style: none;
28358
+ --tw-bg-opacity: 1;
28359
+ background-color: var(--fallback-b1,oklch(var(--b1)/var(--tw-bg-opacity)));
28360
+ appearance: none;
28361
+ -webkit-appearance: none;
28362
+ top: 50%;
28363
+ color: var(--range-shdw);
28364
+ transform: translateY(-50%);
28365
+ --filler-size: 100rem;
28366
+ --filler-offset: 0.6rem;
28367
+ box-shadow: 0 0 0 3px var(--range-shdw) inset,
28368
+ var(--focus-shadow, 0 0),
28369
+ calc(var(--filler-size) * -1 - var(--filler-offset)) 0 0 var(--filler-size);
28370
+ }
28371
+ .range::-moz-range-thumb {
28372
+ position: relative;
28373
+ height: 1.5rem;
28374
+ width: 1.5rem;
28375
+ border-radius: var(--rounded-box, 1rem);
28376
+ border-style: none;
28377
+ --tw-bg-opacity: 1;
28378
+ background-color: var(--fallback-b1,oklch(var(--b1)/var(--tw-bg-opacity)));
28379
+ top: 50%;
28380
+ color: var(--range-shdw);
28381
+ --filler-size: 100rem;
28382
+ --filler-offset: 0.5rem;
28383
+ box-shadow: 0 0 0 3px var(--range-shdw) inset,
28384
+ var(--focus-shadow, 0 0),
28385
+ calc(var(--filler-size) * -1 - var(--filler-offset)) 0 0 var(--filler-size);
28386
+ }
28387
  @keyframes rating-pop {
28388
 
28389
  0% {