Prof-Hunt commited on
Commit
13b2e6a
·
verified ·
1 Parent(s): 34cbe36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -30
app.py CHANGED
@@ -485,39 +485,52 @@ def overlay_text_on_image(image, text):
485
  print(f"Error in overlay_text_on_image: {e}")
486
  return None
487
 
488
- models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if torch.cuda.is_available() else [])}
489
- pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
- @spaces.GPU(duration=30)
492
- def forward_gpu(ps, ref_s, speed):
493
- return models[True](ps, ref_s, speed)
 
 
 
494
 
495
- def generate_combined_audio_from_story(text, voice='af_heart', speed=1):
496
- pipeline = pipelines[voice[0]]
497
- pack = pipeline.load_voice(voice)
498
- use_gpu = torch.cuda.is_available()
 
 
499
 
500
- combined_audio = []
501
- for _, ps, _ in pipeline(text, voice, speed):
502
- ref_s = pack[len(ps)-1]
503
- try:
504
- if use_gpu:
505
- audio = forward_gpu(ps, ref_s, speed)
506
- else:
507
- audio = models[False](ps, ref_s, speed)
508
- combined_audio.extend(audio.numpy())
509
- except Exception as e:
510
- print(f"Error: {e}")
511
- if use_gpu:
512
- print("Retrying with CPU")
513
- audio = models[False](ps, ref_s, speed)
514
- combined_audio.extend(audio.numpy())
515
-
516
- if combined_audio:
517
- filename = "combined_story.wav"
518
- sf.write(filename, np.array(combined_audio), 24000)
519
- return filename
520
- return None
521
 
522
  # Helper functions
523
  def clean_story_output(story):
 
485
  print(f"Error in overlay_text_on_image: {e}")
486
  return None
487
 
488
+ @spaces.GPU(duration=60)
489
+ def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
490
+ """Generate a single audio file for all paragraphs in the story."""
491
+
492
+ # Use existing paragraph logic (from your original function)
493
+ paragraphs = []
494
+ current_paragraph = []
495
+
496
+ for line in story_text.split('\n'):
497
+ line = line.strip()
498
+ if not line: # Empty line indicates paragraph break
499
+ if current_paragraph:
500
+ paragraphs.append(' '.join(current_paragraph))
501
+ current_paragraph = []
502
+ else:
503
+ current_paragraph.append(line)
504
+
505
+ if current_paragraph:
506
+ paragraphs.append(' '.join(current_paragraph))
507
 
508
+ # Combine audio for all paragraphs
509
+ combined_audio = []
510
+
511
+ for paragraph in paragraphs:
512
+ if not paragraph.strip():
513
+ continue # Skip empty paragraphs
514
 
515
+ generator = pipeline(
516
+ paragraph,
517
+ voice=voice,
518
+ speed=speed,
519
+ split_pattern=r'\n+' # Split on newlines
520
+ )
521
 
522
+ for _, _, audio in generator:
523
+ combined_audio.extend(audio) # Append audio data
524
+
525
+ # Convert combined audio to NumPy array
526
+ combined_audio = np.array(combined_audio)
527
+
528
+ # Save audio as a WAV file in Hugging Face's temporary directory
529
+ filename = "/tmp/combined_story.wav"
530
+ sf.write(filename, combined_audio, 24000)
531
+
532
+ clear_memory()
533
+ return filename # Returning filename for Gradio File component
 
 
 
 
 
 
 
 
 
534
 
535
  # Helper functions
536
  def clean_story_output(story):