Spaces:

Prof-Hunt
/

TECH_TALES

Runtime error

App Files Files Community

Prof-Hunt commited on Jan 31

Commit

86b2b12

verified ·

1 Parent(s): 8231cd6

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -119

app.py CHANGED Viewed

@@ -485,129 +485,44 @@ def overlay_text_on_image(image, text):
         return None
 def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
-    """Generate audio for the story with improved error handling and debugging"""
-    clear_memory()
-    if not story_text:
-        print("No story text provided")
-        return None
-    print(f"Generating audio for story of length: {len(story_text)}")
-    # Clean up text and split into manageable chunks
-    paragraphs = [p.strip() for p in story_text.split('\n\n') if p.strip()]
-    if not paragraphs:
-        print("No valid paragraphs found in story")
-        return None
-    print(f"Processing {len(paragraphs)} paragraphs")
     combined_audio = []
-    try:
-        for i, paragraph in enumerate(paragraphs):
-            if not paragraph.strip():
-                continue
-            print(f"Processing paragraph {i+1}/{len(paragraphs)}")
-            print(f"Paragraph length: {len(paragraph)}")
-            print(f"Paragraph text: {paragraph[:100]}...")
-            try:
-                sentences = [s.strip() for s in paragraph.split('.') if s.strip()]
-                print(f"Split into {len(sentences)} sentences")
-                for j, sentence in enumerate(sentences):
-                    print(f"Processing sentence {j+1}/{len(sentences)}")
-                    print(f"Sentence length: {len(sentence)}")
-                    try:
-                        generator = pipeline(
-                            sentence + '.',
-                            voice=voice,
-                            speed=speed,
-                            split_pattern=r'\n+'
-                        )
-                        if generator is None:
-                            print(f"Warning: Generator returned None for sentence: {sentence[:50]}...")
-                            continue
-                        for batch_idx, metadata, audio in generator:
-                            print(f"Batch {batch_idx}")
-                            print(f"Audio type: {type(audio)}")
-                            if audio is not None and len(audio) > 0:
-                                print(f"Audio shape/length: {getattr(audio, 'shape', len(audio))}")
-                                print(f"Audio dtype: {getattr(audio, 'dtype', type(audio[0]))}")
-                                print(f"First few values: {audio[:5]}")
-                                # Convert to float32 numpy array before extending
-                                if isinstance(audio, list):
-                                    audio = np.array(audio, dtype=np.float32)
-                                elif isinstance(audio, np.ndarray):
-                                    audio = audio.astype(np.float32)
-                                combined_audio.extend(audio.tolist())
-                            else:
-                                print(f"Warning: Empty audio for sentence: {sentence[:50]}...")
-                        # Add silence between sentences (as float32)
-                        combined_audio.extend(np.zeros(1000, dtype=np.float32).tolist())
-                    except Exception as e:
-                        print(f"Error processing sentence {j+1}: {str(e)}")
-                        import traceback
-                        print(traceback.format_exc())
-                        continue
-                # Add silence between paragraphs (as float32)
-                combined_audio.extend(np.zeros(2000, dtype=np.float32).tolist())
-            except Exception as e:
-                print(f"Error processing paragraph {i+1}: {str(e)}")
-                import traceback
-                print(traceback.format_exc())
-                continue
-        if not combined_audio:
-            print("No audio was generated")
-            return None
-        # Convert to numpy array and ensure float32
-        combined_audio = np.array(combined_audio, dtype=np.float32)
-        if len(combined_audio) > 0:
-            print(f"Final audio length: {len(combined_audio)}")
-            print(f"Final audio dtype: {combined_audio.dtype}")
-            print(f"Audio min/max values: {np.min(combined_audio)}/{np.max(combined_audio)}")
-            # Only normalize if we have non-zero values
-            if np.max(np.abs(combined_audio)) > 0:
-                combined_audio = combined_audio / np.max(np.abs(combined_audio)) * 0.9
-                print("Audio normalized successfully")
-            else:
-                print("Warning: Audio contains only zeros")
-            try:
-                filename = "combined_story.wav"
-                sf.write(filename, combined_audio, 24000)
-                print(f"Successfully saved audio to {filename}")
-                return filename
-            except Exception as e:
-                print(f"Error saving audio file: {str(e)}")
-                return None
-        else:
-            print("Error: Combined audio array is empty")
-            return None
-    except Exception as e:
-        print(f"Error generating audio: {str(e)}")
-        import traceback
-        print(traceback.format_exc())
-        clear_memory()
-        return None
-    finally:
-        clear_memory()
 # Helper functions
 def clean_story_output(story):

         return None
 def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
+    """Generate a single audio file for all paragraphs in the story."""
+    # Split story into paragraphs (reuse logic from generate_image_prompts)
+    paragraphs = []
+    current_paragraph = []
+    for line in story_text.split('\n'):
+        line = line.strip()
+        if not line:  # Empty line indicates paragraph break
+            if current_paragraph:
+                paragraphs.append(' '.join(current_paragraph))
+                current_paragraph = []
+        else:
+            current_paragraph.append(line)
+    if current_paragraph:
+        paragraphs.append(' '.join(current_paragraph))
+    # Combine audio for all paragraphs
     combined_audio = []
+    for paragraph in paragraphs:
+        if not paragraph.strip():
+            continue  # Skip empty paragraphs
+        generator = pipeline(
+            paragraph,
+            voice=voice,
+            speed=speed,
+            split_pattern=r'\n+'  # Split on newlines
+        )
+        for _, _, audio in generator:
+            combined_audio.extend(audio)  # Append audio data
+    # Convert combined audio to NumPy array and save
+    combined_audio = np.array(combined_audio)
+    filename = "combined_story.wav"
+    sf.write(filename, combined_audio, 24000)  # Save audio as .wav
+    clear_memory()
+    return filename
 # Helper functions
 def clean_story_output(story):