Spaces:

Prof-Hunt
/

TECH_TALES

Runtime error

App Files Files Community

Prof-Hunt commited on Jan 31

Commit

13b2e6a

verified ·

1 Parent(s): 34cbe36

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -30

app.py CHANGED Viewed

@@ -485,39 +485,52 @@ def overlay_text_on_image(image, text):
         print(f"Error in overlay_text_on_image: {e}")
         return None
-models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if torch.cuda.is_available() else [])}
-pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
-@spaces.GPU(duration=30)
-def forward_gpu(ps, ref_s, speed):
-    return models[True](ps, ref_s, speed)
-def generate_combined_audio_from_story(text, voice='af_heart', speed=1):
-    pipeline = pipelines[voice[0]]
-    pack = pipeline.load_voice(voice)
-    use_gpu = torch.cuda.is_available()
-    combined_audio = []
-    for _, ps, _ in pipeline(text, voice, speed):
-        ref_s = pack[len(ps)-1]
-        try:
-            if use_gpu:
-                audio = forward_gpu(ps, ref_s, speed)
-            else:
-                audio = models[False](ps, ref_s, speed)
-            combined_audio.extend(audio.numpy())
-        except Exception as e:
-            print(f"Error: {e}")
-            if use_gpu:
-                print("Retrying with CPU")
-                audio = models[False](ps, ref_s, speed)
-                combined_audio.extend(audio.numpy())
-    if combined_audio:
-        filename = "combined_story.wav"
-        sf.write(filename, np.array(combined_audio), 24000)
-        return filename
-    return None
 # Helper functions
 def clean_story_output(story):

         print(f"Error in overlay_text_on_image: {e}")
         return None
+@spaces.GPU(duration=60)
+def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
+    """Generate a single audio file for all paragraphs in the story."""
+    # Use existing paragraph logic (from your original function)
+    paragraphs = []
+    current_paragraph = []
+    for line in story_text.split('\n'):
+        line = line.strip()
+        if not line:  # Empty line indicates paragraph break
+            if current_paragraph:
+                paragraphs.append(' '.join(current_paragraph))
+                current_paragraph = []
+        else:
+            current_paragraph.append(line)
+    if current_paragraph:
+        paragraphs.append(' '.join(current_paragraph))
+    # Combine audio for all paragraphs
+    combined_audio = []
+    for paragraph in paragraphs:
+        if not paragraph.strip():
+            continue  # Skip empty paragraphs
+        generator = pipeline(
+            paragraph,
+            voice=voice,
+            speed=speed,
+            split_pattern=r'\n+'  # Split on newlines
+        )
+        for _, _, audio in generator:
+            combined_audio.extend(audio)  # Append audio data
+    # Convert combined audio to NumPy array
+    combined_audio = np.array(combined_audio)
+    # Save audio as a WAV file in Hugging Face's temporary directory
+    filename = "/tmp/combined_story.wav"
+    sf.write(filename, combined_audio, 24000)
+    clear_memory()
+    return filename  # Returning filename for Gradio File component
 # Helper functions
 def clean_story_output(story):