Spaces:

Prof-Hunt
/

TECH_TALES

Runtime error

App Files Files Community

Prof-Hunt commited on Jan 31

Commit

34cbe36

verified ·

1 Parent(s): 16bffa4

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -45

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import re
 import psutil
 from datetime import datetime
 import spaces
-from kokoro import KPipeline
 import soundfile as sf
 def clear_memory():
@@ -156,14 +156,15 @@ def analyze_image(image):
 def generate_story(image_description):
     clear_memory()
-    story_prompt = f"""Write a short children's story (one chapter, about 500 words) based on this scene: {image_description}
     Requirements:
     1. Main character: An English bulldog named Champ
     2. Include these values: confidence, teamwork, caring, and hope
     3. Theme: "Doing the right thing is important"
     4. Keep it simple and engaging for young children
-    5. End with a simple moral lesson"""
     try:
         messages = [{"role": "user", "content": story_prompt}]
@@ -484,49 +485,39 @@ def overlay_text_on_image(image, text):
         print(f"Error in overlay_text_on_image: {e}")
         return None
-def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
-    print("Starting audio generation...")
-    # Split story into paragraphs (reuse logic from generate_image_prompts)
-    paragraphs = []
-    current_paragraph = []
-    for line in story_text.split('\n'):
-        line = line.strip()
-        if not line:
-            if current_paragraph:
-                paragraphs.append(' '.join(current_paragraph))
-                current_paragraph = []
-        else:
-            current_paragraph.append(line)
-    if current_paragraph:
-        paragraphs.append(' '.join(current_paragraph))
-    print(f"Found {len(paragraphs)} paragraphs")
-    combined_audio = []
-    for i, paragraph in enumerate(paragraphs):
-        if not paragraph.strip():
-            continue
-        print(f"Processing paragraph {i+1}: {paragraph[:100]}...")
-        generator = pipeline(
-            paragraph,
-            voice=voice,
-            speed=speed,
-            split_pattern=r'\n+'
-        )
-        print(f"Generator created for paragraph {i+1}")
-        for batch_idx, metadata, audio in generator:
-            print(f"Got audio batch {batch_idx}, length: {len(audio) if audio is not None else 0}")
-            combined_audio.extend(audio)
-    print("Converting to array...")
-    combined_audio = np.array(combined_audio)
-    filename = "combined_story.wav"
-    sf.write(filename, combined_audio, 24000)
-    clear_memory()
-    return filename
 # Helper functions
 def clean_story_output(story):

 import psutil
 from datetime import datetime
 import spaces
+from kokoro import KModel, KPipeline
 import soundfile as sf
 def clear_memory():
 def generate_story(image_description):
     clear_memory()
+    story_prompt = f"""Write a short children's story (about 500 words) based on this scene: {image_description}
     Requirements:
     1. Main character: An English bulldog named Champ
     2. Include these values: confidence, teamwork, caring, and hope
     3. Theme: "Doing the right thing is important"
     4. Keep it simple and engaging for young children
+    5. End with a simple moral lesson
+    6. The paragraphs are three sentences"""
     try:
         messages = [{"role": "user", "content": story_prompt}]
         print(f"Error in overlay_text_on_image: {e}")
         return None
+models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if torch.cuda.is_available() else [])}
+pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
+@spaces.GPU(duration=30)
+def forward_gpu(ps, ref_s, speed):
+    return models[True](ps, ref_s, speed)
+def generate_combined_audio_from_story(text, voice='af_heart', speed=1):
+    pipeline = pipelines[voice[0]]
+    pack = pipeline.load_voice(voice)
+    use_gpu = torch.cuda.is_available()
+    combined_audio = []
+    for _, ps, _ in pipeline(text, voice, speed):
+        ref_s = pack[len(ps)-1]
+        try:
+            if use_gpu:
+                audio = forward_gpu(ps, ref_s, speed)
+            else:
+                audio = models[False](ps, ref_s, speed)
+            combined_audio.extend(audio.numpy())
+        except Exception as e:
+            print(f"Error: {e}")
+            if use_gpu:
+                print("Retrying with CPU")
+                audio = models[False](ps, ref_s, speed)
+                combined_audio.extend(audio.numpy())
+    if combined_audio:
+        filename = "combined_story.wav"
+        sf.write(filename, np.array(combined_audio), 24000)
+        return filename
+    return None
 # Helper functions
 def clean_story_output(story):