Prof-Hunt commited on
Commit
34cbe36
·
verified ·
1 Parent(s): 16bffa4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -45
app.py CHANGED
@@ -11,7 +11,7 @@ import re
11
  import psutil
12
  from datetime import datetime
13
  import spaces
14
- from kokoro import KPipeline
15
  import soundfile as sf
16
 
17
  def clear_memory():
@@ -156,14 +156,15 @@ def analyze_image(image):
156
  def generate_story(image_description):
157
  clear_memory()
158
 
159
- story_prompt = f"""Write a short children's story (one chapter, about 500 words) based on this scene: {image_description}
160
 
161
  Requirements:
162
  1. Main character: An English bulldog named Champ
163
  2. Include these values: confidence, teamwork, caring, and hope
164
  3. Theme: "Doing the right thing is important"
165
  4. Keep it simple and engaging for young children
166
- 5. End with a simple moral lesson"""
 
167
 
168
  try:
169
  messages = [{"role": "user", "content": story_prompt}]
@@ -484,49 +485,39 @@ def overlay_text_on_image(image, text):
484
  print(f"Error in overlay_text_on_image: {e}")
485
  return None
486
 
487
- def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
488
- print("Starting audio generation...")
489
- # Split story into paragraphs (reuse logic from generate_image_prompts)
490
- paragraphs = []
491
- current_paragraph = []
492
-
493
- for line in story_text.split('\n'):
494
- line = line.strip()
495
- if not line:
496
- if current_paragraph:
497
- paragraphs.append(' '.join(current_paragraph))
498
- current_paragraph = []
499
- else:
500
- current_paragraph.append(line)
501
-
502
- if current_paragraph:
503
- paragraphs.append(' '.join(current_paragraph))
504
-
505
- print(f"Found {len(paragraphs)} paragraphs")
506
-
507
- combined_audio = []
508
- for i, paragraph in enumerate(paragraphs):
509
- if not paragraph.strip():
510
- continue
511
 
512
- print(f"Processing paragraph {i+1}: {paragraph[:100]}...")
513
- generator = pipeline(
514
- paragraph,
515
- voice=voice,
516
- speed=speed,
517
- split_pattern=r'\n+'
518
- )
519
- print(f"Generator created for paragraph {i+1}")
520
- for batch_idx, metadata, audio in generator:
521
- print(f"Got audio batch {batch_idx}, length: {len(audio) if audio is not None else 0}")
522
- combined_audio.extend(audio)
523
-
524
- print("Converting to array...")
525
- combined_audio = np.array(combined_audio)
526
- filename = "combined_story.wav"
527
- sf.write(filename, combined_audio, 24000)
528
- clear_memory()
529
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
530
 
531
  # Helper functions
532
  def clean_story_output(story):
 
11
  import psutil
12
  from datetime import datetime
13
  import spaces
14
+ from kokoro import KModel, KPipeline
15
  import soundfile as sf
16
 
17
  def clear_memory():
 
156
  def generate_story(image_description):
157
  clear_memory()
158
 
159
+ story_prompt = f"""Write a short children's story (about 500 words) based on this scene: {image_description}
160
 
161
  Requirements:
162
  1. Main character: An English bulldog named Champ
163
  2. Include these values: confidence, teamwork, caring, and hope
164
  3. Theme: "Doing the right thing is important"
165
  4. Keep it simple and engaging for young children
166
+ 5. End with a simple moral lesson
167
+ 6. The paragraphs are three sentences"""
168
 
169
  try:
170
  messages = [{"role": "user", "content": story_prompt}]
 
485
  print(f"Error in overlay_text_on_image: {e}")
486
  return None
487
 
488
+ models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if torch.cuda.is_available() else [])}
489
+ pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
+ @spaces.GPU(duration=30)
492
+ def forward_gpu(ps, ref_s, speed):
493
+ return models[True](ps, ref_s, speed)
494
+
495
+ def generate_combined_audio_from_story(text, voice='af_heart', speed=1):
496
+ pipeline = pipelines[voice[0]]
497
+ pack = pipeline.load_voice(voice)
498
+ use_gpu = torch.cuda.is_available()
499
+
500
+ combined_audio = []
501
+ for _, ps, _ in pipeline(text, voice, speed):
502
+ ref_s = pack[len(ps)-1]
503
+ try:
504
+ if use_gpu:
505
+ audio = forward_gpu(ps, ref_s, speed)
506
+ else:
507
+ audio = models[False](ps, ref_s, speed)
508
+ combined_audio.extend(audio.numpy())
509
+ except Exception as e:
510
+ print(f"Error: {e}")
511
+ if use_gpu:
512
+ print("Retrying with CPU")
513
+ audio = models[False](ps, ref_s, speed)
514
+ combined_audio.extend(audio.numpy())
515
+
516
+ if combined_audio:
517
+ filename = "combined_story.wav"
518
+ sf.write(filename, np.array(combined_audio), 24000)
519
+ return filename
520
+ return None
521
 
522
  # Helper functions
523
  def clean_story_output(story):