Spaces:
Running
on
Zero
Running
on
Zero
Workaround to show subtitles
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from TTS.api import TTS
|
|
6 |
import moviepy.editor as mp
|
7 |
import numpy as np
|
8 |
import os
|
9 |
-
from PIL import Image
|
10 |
|
11 |
def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
|
12 |
words = text.split()
|
@@ -26,10 +26,7 @@ def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
|
|
26 |
return chunks
|
27 |
|
28 |
def generate_speech(text):
|
29 |
-
#tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
|
30 |
tts = TTS("tts_models/en/ljspeech/glow-tts")
|
31 |
-
|
32 |
-
#tts.synthesizer.model.decoder.max_decoder_steps = 30000 # Increase limit
|
33 |
wav_path = "speech.wav"
|
34 |
tts.tts_to_file(text=text, file_path=wav_path)
|
35 |
return wav_path
|
@@ -46,6 +43,13 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
|
|
46 |
image = image.resize(image_size)
|
47 |
else:
|
48 |
image = Image.new("RGB", image_size, (0, 0, 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
img_path = f"image_{i}.png"
|
50 |
image.save(img_path)
|
51 |
image_paths.append(img_path)
|
@@ -53,10 +57,20 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
|
|
53 |
|
54 |
def create_video(images, durations, speech_path, movie_title, add_subtitles, chunks, image_size=(640, 480)):
|
55 |
clips = []
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
for img, dur, chunk in zip(images, durations, chunks):
|
62 |
frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
|
|
|
6 |
import moviepy.editor as mp
|
7 |
import numpy as np
|
8 |
import os
|
9 |
+
from PIL import Image, ImageDraw, ImageFont
|
10 |
|
11 |
def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
|
12 |
words = text.split()
|
|
|
26 |
return chunks
|
27 |
|
28 |
def generate_speech(text):
|
|
|
29 |
tts = TTS("tts_models/en/ljspeech/glow-tts")
|
|
|
|
|
30 |
wav_path = "speech.wav"
|
31 |
tts.tts_to_file(text=text, file_path=wav_path)
|
32 |
return wav_path
|
|
|
43 |
image = image.resize(image_size)
|
44 |
else:
|
45 |
image = Image.new("RGB", image_size, (0, 0, 0))
|
46 |
+
draw = ImageDraw.Draw(image)
|
47 |
+
try:
|
48 |
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 30)
|
49 |
+
except IOError:
|
50 |
+
font = ImageFont.load_default()
|
51 |
+
draw.text((10, 10), chunk, font=font, fill="white")
|
52 |
+
|
53 |
img_path = f"image_{i}.png"
|
54 |
image.save(img_path)
|
55 |
image_paths.append(img_path)
|
|
|
57 |
|
58 |
def create_video(images, durations, speech_path, movie_title, add_subtitles, chunks, image_size=(640, 480)):
|
59 |
clips = []
|
60 |
+
|
61 |
+
# Title clip using PIL instead of ImageMagick
|
62 |
+
title_img = Image.new("RGB", image_size, (0, 0, 0))
|
63 |
+
draw = ImageDraw.Draw(title_img)
|
64 |
+
try:
|
65 |
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 50)
|
66 |
+
except IOError:
|
67 |
+
font = ImageFont.load_default()
|
68 |
+
draw.text((50, 50), movie_title, font=font, fill="white")
|
69 |
+
title_img_path = "title.png"
|
70 |
+
title_img.save(title_img_path)
|
71 |
+
|
72 |
+
title_clip = mp.ImageClip(title_img_path).set_duration(2).set_position('center')
|
73 |
+
clips.append(title_clip)
|
74 |
|
75 |
for img, dur, chunk in zip(images, durations, chunks):
|
76 |
frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
|