Spaces:
Running
on
Zero
Running
on
Zero
Removed subtitles feature and fixed long title placement problem
Browse files
app.py
CHANGED
@@ -8,6 +8,37 @@ import numpy as np
|
|
8 |
import os
|
9 |
from PIL import Image, ImageDraw, ImageFont
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
|
12 |
words = text.split()
|
13 |
chunks = []
|
@@ -55,17 +86,12 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
|
|
55 |
image_paths.append(img_path)
|
56 |
return image_paths
|
57 |
|
58 |
-
def create_video(images, durations, speech_path, movie_title,
|
59 |
clips = []
|
60 |
|
61 |
# Title clip using PIL instead of ImageMagick
|
62 |
-
title_img =
|
63 |
-
|
64 |
-
try:
|
65 |
-
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 50)
|
66 |
-
except IOError:
|
67 |
-
font = ImageFont.load_default()
|
68 |
-
draw.text((50, 50), movie_title, font=font, fill="white")
|
69 |
title_img_path = "title.png"
|
70 |
title_img.save(title_img_path)
|
71 |
|
@@ -75,10 +101,7 @@ def create_video(images, durations, speech_path, movie_title, add_subtitles, chu
|
|
75 |
for img, dur, chunk in zip(images, durations, chunks):
|
76 |
frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
|
77 |
clip = mp.ImageClip(frame).set_duration(dur)
|
78 |
-
|
79 |
-
txt_clip = mp.TextClip(chunk, fontsize=30, color='white', size=(image_size[0] - 20, None), method='caption')
|
80 |
-
txt_clip = txt_clip.set_duration(dur).set_position(('center', 'bottom'))
|
81 |
-
clip = mp.CompositeVideoClip([clip, txt_clip])
|
82 |
clips.append(clip)
|
83 |
|
84 |
black_end = mp.ColorClip(image_size, color=(0,0,0), duration=2)
|
@@ -88,23 +111,22 @@ def create_video(images, durations, speech_path, movie_title, add_subtitles, chu
|
|
88 |
final_video.write_videofile("output.mp4", fps=24)
|
89 |
return "output.mp4"
|
90 |
|
91 |
-
def process_text(text, movie_title, image_size, use_diffusion, num_steps
|
92 |
chunks = estimate_chunk_durations(text)
|
93 |
speech_path = generate_speech(text)
|
94 |
image_paths = generate_images(chunks, image_size, use_diffusion, num_steps)
|
95 |
durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
|
96 |
-
video_path = create_video(image_paths, durations, speech_path, movie_title,
|
97 |
return video_path
|
98 |
|
99 |
with gr.Blocks() as demo:
|
100 |
-
gr.Markdown("# Text-to-Video Generator using AI 🎥")
|
101 |
text_input = gr.Textbox(label="Enter your text")
|
102 |
movie_title_input = gr.Textbox(label="Movie Title", value="")
|
103 |
file_input = gr.File(label="Or upload a .txt file")
|
104 |
image_size_input = gr.Radio(choices=["640x480", "800x600", "1024x768"], label="Select Image Size", value="640x480")
|
105 |
use_diffusion_input = gr.Checkbox(label="Use Diffusion Images", value=True)
|
106 |
num_steps_input = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Diffusion Model Steps")
|
107 |
-
add_subtitles_input = gr.Checkbox(label="Add Subtitles", value=False)
|
108 |
process_btn = gr.Button("Generate Video")
|
109 |
output_video = gr.Video()
|
110 |
|
@@ -114,6 +136,6 @@ with gr.Blocks() as demo:
|
|
114 |
image_size_dict = {"640x480": (640, 480), "800x600": (800, 600), "1024x768": (1024, 768)}
|
115 |
return process_text(text, movie_title, image_size_dict[image_size], use_diffusion, num_steps, add_subtitles)
|
116 |
|
117 |
-
process_btn.click(handle_request, inputs=[text_input, movie_title_input, file_input, image_size_input, use_diffusion_input, num_steps_input
|
118 |
|
119 |
demo.launch()
|
|
|
8 |
import os
|
9 |
from PIL import Image, ImageDraw, ImageFont
|
10 |
|
11 |
+
def create_centered_title(image_size, text, max_font_size=50, min_font_size=10, padding=20):
|
12 |
+
"""Creates a title image with auto-adjusting text size to fit within the image."""
|
13 |
+
title_img = Image.new("RGB", image_size, (0, 0, 0))
|
14 |
+
draw = ImageDraw.Draw(title_img)
|
15 |
+
|
16 |
+
# Load the maximum font size
|
17 |
+
font_size = max_font_size
|
18 |
+
try:
|
19 |
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
|
20 |
+
except IOError:
|
21 |
+
font = ImageFont.load_default()
|
22 |
+
|
23 |
+
# Reduce font size until the text fits within the image
|
24 |
+
while font_size > min_font_size:
|
25 |
+
text_bbox = draw.textbbox((0, 0), text, font=font)
|
26 |
+
text_w = text_bbox[2] - text_bbox[0]
|
27 |
+
text_h = text_bbox[3] - text_bbox[1]
|
28 |
+
|
29 |
+
if text_w <= image_size[0] - 2 * padding and text_h <= image_size[1] - 2 * padding:
|
30 |
+
break # Text fits, exit loop
|
31 |
+
|
32 |
+
font_size -= 2 # Decrease font size
|
33 |
+
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
|
34 |
+
|
35 |
+
# Center the text
|
36 |
+
text_x = (image_size[0] - text_w) // 2
|
37 |
+
text_y = (image_size[1] - text_h) // 2
|
38 |
+
draw.text((text_x, text_y), text, font=font, fill="white")
|
39 |
+
|
40 |
+
return title_img
|
41 |
+
|
42 |
def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
|
43 |
words = text.split()
|
44 |
chunks = []
|
|
|
86 |
image_paths.append(img_path)
|
87 |
return image_paths
|
88 |
|
89 |
+
def create_video(images, durations, speech_path, movie_title, chunks, image_size=(640, 480)):
|
90 |
clips = []
|
91 |
|
92 |
# Title clip using PIL instead of ImageMagick
|
93 |
+
title_img = create_centered_title(image_size, movie_title)
|
94 |
+
|
|
|
|
|
|
|
|
|
|
|
95 |
title_img_path = "title.png"
|
96 |
title_img.save(title_img_path)
|
97 |
|
|
|
101 |
for img, dur, chunk in zip(images, durations, chunks):
|
102 |
frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
|
103 |
clip = mp.ImageClip(frame).set_duration(dur)
|
104 |
+
|
|
|
|
|
|
|
105 |
clips.append(clip)
|
106 |
|
107 |
black_end = mp.ColorClip(image_size, color=(0,0,0), duration=2)
|
|
|
111 |
final_video.write_videofile("output.mp4", fps=24)
|
112 |
return "output.mp4"
|
113 |
|
114 |
+
def process_text(text, movie_title, image_size, use_diffusion, num_steps):
|
115 |
chunks = estimate_chunk_durations(text)
|
116 |
speech_path = generate_speech(text)
|
117 |
image_paths = generate_images(chunks, image_size, use_diffusion, num_steps)
|
118 |
durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
|
119 |
+
video_path = create_video(image_paths, durations, speech_path, movie_title, chunks, image_size)
|
120 |
return video_path
|
121 |
|
122 |
with gr.Blocks() as demo:
|
123 |
+
gr.Markdown("# Text-to-Video Generator for YouTubers using AI 🎥")
|
124 |
text_input = gr.Textbox(label="Enter your text")
|
125 |
movie_title_input = gr.Textbox(label="Movie Title", value="")
|
126 |
file_input = gr.File(label="Or upload a .txt file")
|
127 |
image_size_input = gr.Radio(choices=["640x480", "800x600", "1024x768"], label="Select Image Size", value="640x480")
|
128 |
use_diffusion_input = gr.Checkbox(label="Use Diffusion Images", value=True)
|
129 |
num_steps_input = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Diffusion Model Steps")
|
|
|
130 |
process_btn = gr.Button("Generate Video")
|
131 |
output_video = gr.Video()
|
132 |
|
|
|
136 |
image_size_dict = {"640x480": (640, 480), "800x600": (800, 600), "1024x768": (1024, 768)}
|
137 |
return process_text(text, movie_title, image_size_dict[image_size], use_diffusion, num_steps, add_subtitles)
|
138 |
|
139 |
+
process_btn.click(handle_request, inputs=[text_input, movie_title_input, file_input, image_size_input, use_diffusion_input, num_steps_input], outputs=output_video)
|
140 |
|
141 |
demo.launch()
|