Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import re
|
|
11 |
import psutil
|
12 |
from datetime import datetime
|
13 |
import spaces
|
14 |
-
from kokoro import KPipeline
|
15 |
import soundfile as sf
|
16 |
|
17 |
def clear_memory():
|
@@ -156,14 +156,15 @@ def analyze_image(image):
|
|
156 |
def generate_story(image_description):
|
157 |
clear_memory()
|
158 |
|
159 |
-
story_prompt = f"""Write a short children's story (
|
160 |
|
161 |
Requirements:
|
162 |
1. Main character: An English bulldog named Champ
|
163 |
2. Include these values: confidence, teamwork, caring, and hope
|
164 |
3. Theme: "Doing the right thing is important"
|
165 |
4. Keep it simple and engaging for young children
|
166 |
-
5. End with a simple moral lesson
|
|
|
167 |
|
168 |
try:
|
169 |
messages = [{"role": "user", "content": story_prompt}]
|
@@ -484,49 +485,39 @@ def overlay_text_on_image(image, text):
|
|
484 |
print(f"Error in overlay_text_on_image: {e}")
|
485 |
return None
|
486 |
|
487 |
-
|
488 |
-
|
489 |
-
# Split story into paragraphs (reuse logic from generate_image_prompts)
|
490 |
-
paragraphs = []
|
491 |
-
current_paragraph = []
|
492 |
-
|
493 |
-
for line in story_text.split('\n'):
|
494 |
-
line = line.strip()
|
495 |
-
if not line:
|
496 |
-
if current_paragraph:
|
497 |
-
paragraphs.append(' '.join(current_paragraph))
|
498 |
-
current_paragraph = []
|
499 |
-
else:
|
500 |
-
current_paragraph.append(line)
|
501 |
-
|
502 |
-
if current_paragraph:
|
503 |
-
paragraphs.append(' '.join(current_paragraph))
|
504 |
-
|
505 |
-
print(f"Found {len(paragraphs)} paragraphs")
|
506 |
-
|
507 |
-
combined_audio = []
|
508 |
-
for i, paragraph in enumerate(paragraphs):
|
509 |
-
if not paragraph.strip():
|
510 |
-
continue
|
511 |
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
|
531 |
# Helper functions
|
532 |
def clean_story_output(story):
|
|
|
11 |
import psutil
|
12 |
from datetime import datetime
|
13 |
import spaces
|
14 |
+
from kokoro import KModel, KPipeline
|
15 |
import soundfile as sf
|
16 |
|
17 |
def clear_memory():
|
|
|
156 |
def generate_story(image_description):
|
157 |
clear_memory()
|
158 |
|
159 |
+
story_prompt = f"""Write a short children's story (about 500 words) based on this scene: {image_description}
|
160 |
|
161 |
Requirements:
|
162 |
1. Main character: An English bulldog named Champ
|
163 |
2. Include these values: confidence, teamwork, caring, and hope
|
164 |
3. Theme: "Doing the right thing is important"
|
165 |
4. Keep it simple and engaging for young children
|
166 |
+
5. End with a simple moral lesson
|
167 |
+
6. The paragraphs are three sentences"""
|
168 |
|
169 |
try:
|
170 |
messages = [{"role": "user", "content": story_prompt}]
|
|
|
485 |
print(f"Error in overlay_text_on_image: {e}")
|
486 |
return None
|
487 |
|
488 |
+
models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if torch.cuda.is_available() else [])}
|
489 |
+
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
|
491 |
+
@spaces.GPU(duration=30)
|
492 |
+
def forward_gpu(ps, ref_s, speed):
|
493 |
+
return models[True](ps, ref_s, speed)
|
494 |
+
|
495 |
+
def generate_combined_audio_from_story(text, voice='af_heart', speed=1):
|
496 |
+
pipeline = pipelines[voice[0]]
|
497 |
+
pack = pipeline.load_voice(voice)
|
498 |
+
use_gpu = torch.cuda.is_available()
|
499 |
+
|
500 |
+
combined_audio = []
|
501 |
+
for _, ps, _ in pipeline(text, voice, speed):
|
502 |
+
ref_s = pack[len(ps)-1]
|
503 |
+
try:
|
504 |
+
if use_gpu:
|
505 |
+
audio = forward_gpu(ps, ref_s, speed)
|
506 |
+
else:
|
507 |
+
audio = models[False](ps, ref_s, speed)
|
508 |
+
combined_audio.extend(audio.numpy())
|
509 |
+
except Exception as e:
|
510 |
+
print(f"Error: {e}")
|
511 |
+
if use_gpu:
|
512 |
+
print("Retrying with CPU")
|
513 |
+
audio = models[False](ps, ref_s, speed)
|
514 |
+
combined_audio.extend(audio.numpy())
|
515 |
+
|
516 |
+
if combined_audio:
|
517 |
+
filename = "combined_story.wav"
|
518 |
+
sf.write(filename, np.array(combined_audio), 24000)
|
519 |
+
return filename
|
520 |
+
return None
|
521 |
|
522 |
# Helper functions
|
523 |
def clean_story_output(story):
|