Spaces:

nezihtopaloglu
/

text-to-video

Running on Zero

App Files Files Community

nezihtopaloglu commited on Feb 17

Commit

d22b9a3

verified ·

1 Parent(s): 38af13d

Workaround to show subtitles

Browse files

Files changed (1) hide show

app.py +22 -8

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from TTS.api import TTS
 import moviepy.editor as mp
 import numpy as np
 import os
-from PIL import Image
 def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
     words = text.split()
@@ -26,10 +26,7 @@ def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
     return chunks
 def generate_speech(text):
-    #tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
     tts = TTS("tts_models/en/ljspeech/glow-tts")
-    #tts.synthesizer.model.decoder.max_decoder_steps = 30000  # Increase limit
     wav_path = "speech.wav"
     tts.tts_to_file(text=text, file_path=wav_path)
     return wav_path
@@ -46,6 +43,13 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
             image = image.resize(image_size)
         else:
             image = Image.new("RGB", image_size, (0, 0, 0))
         img_path = f"image_{i}.png"
         image.save(img_path)
         image_paths.append(img_path)
@@ -53,10 +57,20 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
 def create_video(images, durations, speech_path, movie_title, add_subtitles, chunks, image_size=(640, 480)):
     clips = []
-    title_clip = mp.TextClip(movie_title, fontsize=50, color='white', size=image_size)
-    title_clip = title_clip.set_duration(1).set_position('center')
-    black_start = mp.ColorClip(image_size, color=(0,0,0), duration=1).set_opacity(0.8)
-    clips.append(mp.CompositeVideoClip([black_start, title_clip]))
     for img, dur, chunk in zip(images, durations, chunks):
         frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))

 import moviepy.editor as mp
 import numpy as np
 import os
+from PIL import Image, ImageDraw, ImageFont
 def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
     words = text.split()
     return chunks
 def generate_speech(text):
     tts = TTS("tts_models/en/ljspeech/glow-tts")
     wav_path = "speech.wav"
     tts.tts_to_file(text=text, file_path=wav_path)
     return wav_path
             image = image.resize(image_size)
         else:
             image = Image.new("RGB", image_size, (0, 0, 0))
+            draw = ImageDraw.Draw(image)
+            try:
+                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 30)
+            except IOError:
+                font = ImageFont.load_default()
+            draw.text((10, 10), chunk, font=font, fill="white")
         img_path = f"image_{i}.png"
         image.save(img_path)
         image_paths.append(img_path)
 def create_video(images, durations, speech_path, movie_title, add_subtitles, chunks, image_size=(640, 480)):
     clips = []
+    # Title clip using PIL instead of ImageMagick
+    title_img = Image.new("RGB", image_size, (0, 0, 0))
+    draw = ImageDraw.Draw(title_img)
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 50)
+    except IOError:
+        font = ImageFont.load_default()
+    draw.text((50, 50), movie_title, font=font, fill="white")
+    title_img_path = "title.png"
+    title_img.save(title_img_path)
+    title_clip = mp.ImageClip(title_img_path).set_duration(2).set_position('center')
+    clips.append(title_clip)
     for img, dur, chunk in zip(images, durations, chunks):
         frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))