Prof-Hunt commited on
Commit
86b2b12
·
verified ·
1 Parent(s): 8231cd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -119
app.py CHANGED
@@ -485,129 +485,44 @@ def overlay_text_on_image(image, text):
485
  return None
486
 
487
  def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
488
- """Generate audio for the story with improved error handling and debugging"""
489
- clear_memory()
490
-
491
- if not story_text:
492
- print("No story text provided")
493
- return None
494
 
495
- print(f"Generating audio for story of length: {len(story_text)}")
 
 
 
 
 
 
 
496
 
497
- # Clean up text and split into manageable chunks
498
- paragraphs = [p.strip() for p in story_text.split('\n\n') if p.strip()]
499
- if not paragraphs:
500
- print("No valid paragraphs found in story")
501
- return None
502
 
503
- print(f"Processing {len(paragraphs)} paragraphs")
504
  combined_audio = []
505
-
506
- try:
507
- for i, paragraph in enumerate(paragraphs):
508
- if not paragraph.strip():
509
- continue
510
-
511
- print(f"Processing paragraph {i+1}/{len(paragraphs)}")
512
- print(f"Paragraph length: {len(paragraph)}")
513
- print(f"Paragraph text: {paragraph[:100]}...")
514
-
515
- try:
516
- sentences = [s.strip() for s in paragraph.split('.') if s.strip()]
517
- print(f"Split into {len(sentences)} sentences")
518
-
519
- for j, sentence in enumerate(sentences):
520
- print(f"Processing sentence {j+1}/{len(sentences)}")
521
- print(f"Sentence length: {len(sentence)}")
522
-
523
- try:
524
- generator = pipeline(
525
- sentence + '.',
526
- voice=voice,
527
- speed=speed,
528
- split_pattern=r'\n+'
529
- )
530
-
531
- if generator is None:
532
- print(f"Warning: Generator returned None for sentence: {sentence[:50]}...")
533
- continue
534
-
535
- for batch_idx, metadata, audio in generator:
536
- print(f"Batch {batch_idx}")
537
- print(f"Audio type: {type(audio)}")
538
-
539
- if audio is not None and len(audio) > 0:
540
- print(f"Audio shape/length: {getattr(audio, 'shape', len(audio))}")
541
- print(f"Audio dtype: {getattr(audio, 'dtype', type(audio[0]))}")
542
- print(f"First few values: {audio[:5]}")
543
-
544
- # Convert to float32 numpy array before extending
545
- if isinstance(audio, list):
546
- audio = np.array(audio, dtype=np.float32)
547
- elif isinstance(audio, np.ndarray):
548
- audio = audio.astype(np.float32)
549
-
550
- combined_audio.extend(audio.tolist())
551
- else:
552
- print(f"Warning: Empty audio for sentence: {sentence[:50]}...")
553
-
554
- # Add silence between sentences (as float32)
555
- combined_audio.extend(np.zeros(1000, dtype=np.float32).tolist())
556
-
557
- except Exception as e:
558
- print(f"Error processing sentence {j+1}: {str(e)}")
559
- import traceback
560
- print(traceback.format_exc())
561
- continue
562
-
563
- # Add silence between paragraphs (as float32)
564
- combined_audio.extend(np.zeros(2000, dtype=np.float32).tolist())
565
-
566
- except Exception as e:
567
- print(f"Error processing paragraph {i+1}: {str(e)}")
568
- import traceback
569
- print(traceback.format_exc())
570
- continue
571
-
572
- if not combined_audio:
573
- print("No audio was generated")
574
- return None
575
-
576
- # Convert to numpy array and ensure float32
577
- combined_audio = np.array(combined_audio, dtype=np.float32)
578
- if len(combined_audio) > 0:
579
- print(f"Final audio length: {len(combined_audio)}")
580
- print(f"Final audio dtype: {combined_audio.dtype}")
581
- print(f"Audio min/max values: {np.min(combined_audio)}/{np.max(combined_audio)}")
582
-
583
- # Only normalize if we have non-zero values
584
- if np.max(np.abs(combined_audio)) > 0:
585
- combined_audio = combined_audio / np.max(np.abs(combined_audio)) * 0.9
586
- print("Audio normalized successfully")
587
- else:
588
- print("Warning: Audio contains only zeros")
589
-
590
- try:
591
- filename = "combined_story.wav"
592
- sf.write(filename, combined_audio, 24000)
593
- print(f"Successfully saved audio to {filename}")
594
- return filename
595
- except Exception as e:
596
- print(f"Error saving audio file: {str(e)}")
597
- return None
598
- else:
599
- print("Error: Combined audio array is empty")
600
- return None
601
-
602
- except Exception as e:
603
- print(f"Error generating audio: {str(e)}")
604
- import traceback
605
- print(traceback.format_exc())
606
- clear_memory()
607
- return None
608
-
609
- finally:
610
- clear_memory()
611
 
612
  # Helper functions
613
  def clean_story_output(story):
 
485
  return None
486
 
487
  def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
488
+ """Generate a single audio file for all paragraphs in the story."""
489
+ # Split story into paragraphs (reuse logic from generate_image_prompts)
490
+ paragraphs = []
491
+ current_paragraph = []
 
 
492
 
493
+ for line in story_text.split('\n'):
494
+ line = line.strip()
495
+ if not line: # Empty line indicates paragraph break
496
+ if current_paragraph:
497
+ paragraphs.append(' '.join(current_paragraph))
498
+ current_paragraph = []
499
+ else:
500
+ current_paragraph.append(line)
501
 
502
+ if current_paragraph:
503
+ paragraphs.append(' '.join(current_paragraph))
 
 
 
504
 
505
+ # Combine audio for all paragraphs
506
  combined_audio = []
507
+ for paragraph in paragraphs:
508
+ if not paragraph.strip():
509
+ continue # Skip empty paragraphs
510
+
511
+ generator = pipeline(
512
+ paragraph,
513
+ voice=voice,
514
+ speed=speed,
515
+ split_pattern=r'\n+' # Split on newlines
516
+ )
517
+ for _, _, audio in generator:
518
+ combined_audio.extend(audio) # Append audio data
519
+
520
+ # Convert combined audio to NumPy array and save
521
+ combined_audio = np.array(combined_audio)
522
+ filename = "combined_story.wav"
523
+ sf.write(filename, combined_audio, 24000) # Save audio as .wav
524
+ clear_memory()
525
+ return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
  # Helper functions
528
  def clean_story_output(story):