nezihtopaloglu commited on
Commit
4683bc9
·
verified ·
1 Parent(s): d22b9a3

Removed subtitles feature and fixed long title placement problem

Browse files
Files changed (1) hide show
  1. app.py +39 -17
app.py CHANGED
@@ -8,6 +8,37 @@ import numpy as np
8
  import os
9
  from PIL import Image, ImageDraw, ImageFont
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
12
  words = text.split()
13
  chunks = []
@@ -55,17 +86,12 @@ def generate_images(chunks, image_size=(640, 480), use_diffusion=True, num_steps
55
  image_paths.append(img_path)
56
  return image_paths
57
 
58
- def create_video(images, durations, speech_path, movie_title, add_subtitles, chunks, image_size=(640, 480)):
59
  clips = []
60
 
61
  # Title clip using PIL instead of ImageMagick
62
- title_img = Image.new("RGB", image_size, (0, 0, 0))
63
- draw = ImageDraw.Draw(title_img)
64
- try:
65
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 50)
66
- except IOError:
67
- font = ImageFont.load_default()
68
- draw.text((50, 50), movie_title, font=font, fill="white")
69
  title_img_path = "title.png"
70
  title_img.save(title_img_path)
71
 
@@ -75,10 +101,7 @@ def create_video(images, durations, speech_path, movie_title, add_subtitles, chu
75
  for img, dur, chunk in zip(images, durations, chunks):
76
  frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
77
  clip = mp.ImageClip(frame).set_duration(dur)
78
- if add_subtitles:
79
- txt_clip = mp.TextClip(chunk, fontsize=30, color='white', size=(image_size[0] - 20, None), method='caption')
80
- txt_clip = txt_clip.set_duration(dur).set_position(('center', 'bottom'))
81
- clip = mp.CompositeVideoClip([clip, txt_clip])
82
  clips.append(clip)
83
 
84
  black_end = mp.ColorClip(image_size, color=(0,0,0), duration=2)
@@ -88,23 +111,22 @@ def create_video(images, durations, speech_path, movie_title, add_subtitles, chu
88
  final_video.write_videofile("output.mp4", fps=24)
89
  return "output.mp4"
90
 
91
- def process_text(text, movie_title, image_size, use_diffusion, num_steps, add_subtitles):
92
  chunks = estimate_chunk_durations(text)
93
  speech_path = generate_speech(text)
94
  image_paths = generate_images(chunks, image_size, use_diffusion, num_steps)
95
  durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
96
- video_path = create_video(image_paths, durations, speech_path, movie_title, add_subtitles, chunks, image_size)
97
  return video_path
98
 
99
  with gr.Blocks() as demo:
100
- gr.Markdown("# Text-to-Video Generator using AI 🎥")
101
  text_input = gr.Textbox(label="Enter your text")
102
  movie_title_input = gr.Textbox(label="Movie Title", value="")
103
  file_input = gr.File(label="Or upload a .txt file")
104
  image_size_input = gr.Radio(choices=["640x480", "800x600", "1024x768"], label="Select Image Size", value="640x480")
105
  use_diffusion_input = gr.Checkbox(label="Use Diffusion Images", value=True)
106
  num_steps_input = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Diffusion Model Steps")
107
- add_subtitles_input = gr.Checkbox(label="Add Subtitles", value=False)
108
  process_btn = gr.Button("Generate Video")
109
  output_video = gr.Video()
110
 
@@ -114,6 +136,6 @@ with gr.Blocks() as demo:
114
  image_size_dict = {"640x480": (640, 480), "800x600": (800, 600), "1024x768": (1024, 768)}
115
  return process_text(text, movie_title, image_size_dict[image_size], use_diffusion, num_steps, add_subtitles)
116
 
117
- process_btn.click(handle_request, inputs=[text_input, movie_title_input, file_input, image_size_input, use_diffusion_input, num_steps_input, add_subtitles_input], outputs=output_video)
118
 
119
  demo.launch()
 
8
  import os
9
  from PIL import Image, ImageDraw, ImageFont
10
 
11
+ def create_centered_title(image_size, text, max_font_size=50, min_font_size=10, padding=20):
12
+ """Creates a title image with auto-adjusting text size to fit within the image."""
13
+ title_img = Image.new("RGB", image_size, (0, 0, 0))
14
+ draw = ImageDraw.Draw(title_img)
15
+
16
+ # Load the maximum font size
17
+ font_size = max_font_size
18
+ try:
19
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
20
+ except IOError:
21
+ font = ImageFont.load_default()
22
+
23
+ # Reduce font size until the text fits within the image
24
+ while font_size > min_font_size:
25
+ text_bbox = draw.textbbox((0, 0), text, font=font)
26
+ text_w = text_bbox[2] - text_bbox[0]
27
+ text_h = text_bbox[3] - text_bbox[1]
28
+
29
+ if text_w <= image_size[0] - 2 * padding and text_h <= image_size[1] - 2 * padding:
30
+ break # Text fits, exit loop
31
+
32
+ font_size -= 2 # Decrease font size
33
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
34
+
35
+ # Center the text
36
+ text_x = (image_size[0] - text_w) // 2
37
+ text_y = (image_size[1] - text_h) // 2
38
+ draw.text((text_x, text_y), text, font=font, fill="white")
39
+
40
+ return title_img
41
+
42
  def estimate_chunk_durations(text, words_per_second=2.5, min_sec=5, max_sec=7):
43
  words = text.split()
44
  chunks = []
 
86
  image_paths.append(img_path)
87
  return image_paths
88
 
89
+ def create_video(images, durations, speech_path, movie_title, chunks, image_size=(640, 480)):
90
  clips = []
91
 
92
  # Title clip using PIL instead of ImageMagick
93
+ title_img = create_centered_title(image_size, movie_title)
94
+
 
 
 
 
 
95
  title_img_path = "title.png"
96
  title_img.save(title_img_path)
97
 
 
101
  for img, dur, chunk in zip(images, durations, chunks):
102
  frame = np.array(Image.open(img).resize(image_size, Image.Resampling.LANCZOS))
103
  clip = mp.ImageClip(frame).set_duration(dur)
104
+
 
 
 
105
  clips.append(clip)
106
 
107
  black_end = mp.ColorClip(image_size, color=(0,0,0), duration=2)
 
111
  final_video.write_videofile("output.mp4", fps=24)
112
  return "output.mp4"
113
 
114
+ def process_text(text, movie_title, image_size, use_diffusion, num_steps):
115
  chunks = estimate_chunk_durations(text)
116
  speech_path = generate_speech(text)
117
  image_paths = generate_images(chunks, image_size, use_diffusion, num_steps)
118
  durations = [min(10, max(5, len(chunk.split()) / 2.5)) for chunk in chunks]
119
+ video_path = create_video(image_paths, durations, speech_path, movie_title, chunks, image_size)
120
  return video_path
121
 
122
  with gr.Blocks() as demo:
123
+ gr.Markdown("# Text-to-Video Generator for YouTubers using AI 🎥")
124
  text_input = gr.Textbox(label="Enter your text")
125
  movie_title_input = gr.Textbox(label="Movie Title", value="")
126
  file_input = gr.File(label="Or upload a .txt file")
127
  image_size_input = gr.Radio(choices=["640x480", "800x600", "1024x768"], label="Select Image Size", value="640x480")
128
  use_diffusion_input = gr.Checkbox(label="Use Diffusion Images", value=True)
129
  num_steps_input = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Diffusion Model Steps")
 
130
  process_btn = gr.Button("Generate Video")
131
  output_video = gr.Video()
132
 
 
136
  image_size_dict = {"640x480": (640, 480), "800x600": (800, 600), "1024x768": (1024, 768)}
137
  return process_text(text, movie_title, image_size_dict[image_size], use_diffusion, num_steps, add_subtitles)
138
 
139
+ process_btn.click(handle_request, inputs=[text_input, movie_title_input, file_input, image_size_input, use_diffusion_input, num_steps_input], outputs=output_video)
140
 
141
  demo.launch()