testdeep123 commited on
Commit
372c71b
·
verified ·
1 Parent(s): e82097c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +810 -161
app.py CHANGED
@@ -1,155 +1,412 @@
1
-
 
2
  import soundfile as sf
3
  import torch
4
  import os
5
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
6
- from PIL import Image
7
- import tempfile
8
- import random
9
- import cv2
10
- import math
11
- import time
12
- import re
13
- import requests
14
- from moviepy.editor import concatenate_videoclips, CompositeVideoClip, TextClip
15
  import moviepy.config as mpy_config
16
  from pydub import AudioSegment
 
 
17
  from bs4 import BeautifulSoup
 
18
  from urllib.parse import quote
 
19
  from gtts import gTTS
20
  import gradio as gr
 
 
 
 
 
 
 
21
  import shutil
22
 
23
- pipeline = KPipeline(lang_code='a')
 
 
24
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
25
 
 
26
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
27
  OPENROUTER_API_KEY = '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'
28
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 
29
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
30
 
 
31
  TARGET_RESOLUTION = None
32
  TEMP_FOLDER = None
33
 
 
34
  def generate_script(user_input):
35
- headers = {'Authorization': f'Bearer {OPENROUTER_API_KEY}','HTTP-Referer': 'https://your-domain.com','X-Title': 'AI Documentary Maker'}
36
- prompt = f"Short Documentary Script Generator Instructions: {user_input}"
37
- data = {'model': OPENROUTER_MODEL,'messages': [{'role': 'user', 'content': prompt}],'temperature': 0.4,'max_tokens': 5000}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
- response = requests.post('https://openrouter.ai/api/v1/chat/completions', headers=headers, json=data, timeout=30)
40
- if response.status_code == 200: return response.json()['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  return None
42
- except: return None
43
 
44
  def parse_script(script_text):
45
  sections = {}
46
  current_title = None
47
  current_text = ""
48
- for line in script_text.splitlines():
49
- line = line.strip()
50
- if line.startswith("[") and "]" in line:
51
- bracket_start = line.find("[")
52
- bracket_end = line.find("]", bracket_start)
53
- if bracket_start != -1 and bracket_end != -1:
54
- if current_title: sections[current_title] = current_text.strip()
55
- current_title = line[bracket_start+1:bracket_end]
56
- current_text = line[bracket_end+1:].strip()
57
- elif current_title: current_text += line + " "
58
- if current_title: sections[current_title] = current_text.strip()
59
- elements = []
60
- for title, narration in sections.items():
61
- if not title or not narration: continue
62
- media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
63
- words = narration.split()
64
- duration = max(3, len(words) * 0.5)
65
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
66
- elements.append(media_element)
67
- elements.append(tts_element)
68
- return elements
 
 
 
 
 
 
 
 
69
 
70
  def search_pexels_videos(query, pexels_api_key):
71
  headers = {'Authorization': pexels_api_key}
72
  base_url = "https://api.pexels.com/videos/search"
 
 
 
 
 
73
  all_videos = []
74
- for page in range(1, 4):
75
- params = {"query": query, "per_page": 15, "page": page}
76
- response = requests.get(base_url, headers=headers, params=params, timeout=10)
77
- if response.status_code == 200:
78
- for video in response.json().get("videos", []):
79
- for file in video.get("video_files", []):
80
- if file.get("quality") == "hd":
81
- all_videos.append(file.get("link"))
 
 
82
  break
83
- return random.choice(all_videos) if all_videos else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  def search_pexels_images(query, pexels_api_key):
86
  headers = {'Authorization': pexels_api_key}
87
  url = "https://api.pexels.com/v1/search"
88
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
89
- response = requests.get(url, headers=headers, params=params, timeout=10)
90
- if response.status_code == 200:
91
- photos = response.json().get("photos", [])
92
- if photos: return random.choice(photos[:5]).get("src", {}).get("original")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return None
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def download_image(image_url, filename):
96
  try:
97
- response = requests.get(image_url, stream=True, timeout=15)
 
 
98
  response.raise_for_status()
99
  with open(filename, 'wb') as f:
100
- for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
101
- img = Image.open(filename)
102
- if img.mode != 'RGB':
103
- img = img.convert('RGB')
104
- img.save(filename)
105
- return filename
106
- except: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def download_video(video_url, filename):
109
  try:
110
  response = requests.get(video_url, stream=True, timeout=30)
 
111
  with open(filename, 'wb') as f:
112
- for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
 
 
113
  return filename
114
- except: return None
 
 
 
 
115
 
116
- def generate_media(prompt):
117
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
118
- video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
119
- video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
120
- if video_url:
121
- if download_video(video_url, video_file): return {"path": video_file, "asset_type": "video"}
122
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
124
- if image_url:
125
- if download_image(image_url, image_file): return {"path": image_file, "asset_type": "image"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  return None
127
 
128
- def generate_tts(text, voice):
129
- file_path = os.path.join(TEMP_FOLDER, f"tts_{text[:10]}.wav")
 
 
 
 
130
  try:
131
- generator = pipeline(text, voice='af_heart', speed=0.9, split_pattern=r'\n+')
132
- audio_segments = [audio for _, _, audio in generator]
 
 
 
133
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
134
  sf.write(file_path, full_audio, 24000)
 
135
  return file_path
136
- except:
 
137
  try:
 
138
  tts = gTTS(text=text, lang='en')
139
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{text[:10]}.mp3")
140
  tts.save(mp3_path)
141
  audio = AudioSegment.from_mp3(mp3_path)
142
  audio.export(file_path, format="wav")
143
  os.remove(mp3_path)
 
144
  return file_path
145
- except: return None
 
 
146
 
147
- def apply_kenburns_effect(clip, target_resolution):
 
 
 
 
 
 
 
 
148
  target_w, target_h = target_resolution
149
  clip_aspect = clip.w / clip.h
150
  target_aspect = target_w / target_h
151
- if clip_aspect > target_aspect: new_height = target_h; new_width = int(new_height * clip_aspect)
152
- else: new_width = target_w; new_height = int(new_width / clip_aspect)
 
 
 
 
153
  clip = clip.resize(newsize=(new_width, new_height))
154
  base_scale = 1.15
155
  new_width = int(new_width * base_scale)
@@ -157,16 +414,51 @@ def apply_kenburns_effect(clip, target_resolution):
157
  clip = clip.resize(newsize=(new_width, new_height))
158
  max_offset_x = new_width - target_w
159
  max_offset_y = new_height - target_h
160
- effect_type = random.choice(["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  def transform_frame(get_frame, t):
162
  frame = get_frame(t)
163
  ratio = t / clip.duration if clip.duration > 0 else 0
164
  ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
165
- current_zoom = 0.9 + (1.1 - 0.9) * ratio
166
  crop_w = int(target_w / current_zoom)
167
  crop_h = int(target_h / current_zoom)
168
- current_center_x = (new_width / 2) + (max_offset_x * ratio)
169
- current_center_y = (new_height / 2) + (max_offset_y * ratio)
 
 
 
 
 
 
170
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
171
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
172
  return resized_frame
@@ -186,92 +478,449 @@ def resize_to_fill(clip, target_resolution):
186
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
187
  return clip
188
 
189
- def create_custom_clip(media_path, tts_path, text, text_color, bg_color, font_size, position, duration):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  try:
191
- if media_path.endswith(('.mp4', '.mov')):
 
 
 
 
 
 
 
192
  clip = VideoFileClip(media_path)
193
- clip = resize_to_fill(clip, TARGET_RESOLUTION)
194
- if clip.duration < duration: clip = clip.loop(duration=duration)
195
- else: clip = clip.subclip(0, duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  else:
197
- clip = ImageClip(media_path).set_duration(duration)
198
- clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
199
- text_clip = TextClip(text, fontsize=font_size, font='Arial-Bold', color=text_color, bg_color=bg_color, size=(TARGET_RESOLUTION[0]*0.9, None))
200
- y_pos = {"Bottom": TARGET_RESOLUTION[1]*0.85, "Middle": 'center', "Top": TARGET_RESOLUTION[1]*0.15}[position]
201
- text_clip = text_clip.set_position(('center', y_pos)).set_duration(duration)
202
- video_clip = CompositeVideoClip([clip, text_clip])
203
- audio_clip = AudioFileClip(tts_path)
204
- return video_clip.set_audio(audio_clip)
205
- except: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- def generate_editable_script(script_text):
208
- elements = parse_script(script_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  paired_elements = []
210
  for i in range(0, len(elements), 2):
211
  if i + 1 < len(elements):
212
- paired_elements.append({"media_prompt": elements[i]['prompt'], "text": elements[i+1]['text'], "duration": elements[i+1]['duration']})
213
- return paired_elements
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- def process_initial_input(prompt, resolution, text_color, bg_color, font_size):
216
- global TARGET_RESOLUTION, TEMP_FOLDER
217
- TARGET_RESOLUTION = (1920, 1080) if resolution == "Full (16:9)" else (1080, 1920)
218
- TEMP_FOLDER = tempfile.mkdtemp()
219
- script = generate_script(prompt)
220
- if not script: raise gr.Error("Failed to generate script")
221
- clips_data = generate_editable_script(script)
222
- for clip in clips_data: clip.update({"text_color": text_color, "bg_color": bg_color, "font_size": font_size, "position": "Bottom"})
223
- return gr.Column.update(visible=False), gr.Column.update(visible=True), clips_data
 
 
 
 
224
 
225
- def generate_final_video(clips_data, bg_music=None, bg_volume=0.1):
226
- try:
227
- all_clips = []
228
- for clip_info in clips_data:
229
- media_path = clip_info.get('media')
230
- if not media_path:
231
- media_asset = generate_media(clip_info['media_prompt'])
232
- media_path = media_asset['path'] if media_asset else None
233
- if not media_path: continue
234
- tts_path = generate_tts(clip_info['text'], 'en')
235
- clip = create_custom_clip(media_path, tts_path, clip_info['text'], clip_info['text_color'], clip_info['bg_color'], clip_info['font_size'], clip_info['position'], clip_info['duration'])
236
- if clip: all_clips.append(clip)
237
- if not all_clips: raise gr.Error("Failed to create clips")
238
- final_video = concatenate_videoclips(all_clips, method="compose")
239
- if bg_music:
240
- bg_audio = AudioFileClip(bg_music).volumex(bg_volume)
241
- final_video = final_video.set_audio(CompositeAudioClip([final_video.audio, bg_audio]))
242
- output_path = os.path.join(TEMP_FOLDER, "final_video.mp4")
243
- final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast')
244
- return output_path
245
- except Exception as e: raise gr.Error(str(e))
246
- finally:
247
- if 'final_video' in locals(): final_video.close()
248
- shutil.rmtree(TEMP_FOLDER, ignore_errors=True)
 
 
 
 
 
 
249
 
250
- with gr.Blocks(title="AI Documentary Maker Pro", theme=gr.themes.Soft()) as demo:
251
- gr.Markdown("# 📽️ AI Documentary Maker Pro")
252
- with gr.Column(visible=True, elem_id="input_section") as input_section:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  with gr.Row():
254
- prompt = gr.Textbox(label="Documentary Topic")
255
- resolution = gr.Radio(["Full (16:9)", "Short (9:16)"], label="Aspect Ratio", value="Full (16:9)")
256
- with gr.Accordion("Advanced Styling", open=False):
257
- with gr.Row():
258
- text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
259
- bg_color = gr.ColorPicker(label="Background", value="#00000000")
260
- font_size = gr.Slider(20, 80, value=40, label="Font Size")
261
- submit_btn = gr.Button("Generate Script & Preview", variant="primary")
262
- with gr.Column(visible=False, elem_id="edit_section") as edit_section:
263
- clips_ui = gr.State()
264
- clips_group = gr.Column()
265
- with gr.Accordion("Background Music", open=False):
266
- bg_music = gr.Audio(label="Upload Music", type="filepath")
267
- bg_volume = gr.Slider(0, 1, value=0.1, label="Volume")
268
- generate_btn = gr.Button("Generate Final Video", variant="primary")
269
- video_output = gr.Video(label="Final Video")
270
- back_btn = gr.Button("Back to Start")
271
-
272
- submit_btn.click(process_initial_input, [prompt, resolution, text_color, bg_color, font_size], [input_section, edit_section, clips_ui]).then(lambda x: [gr.File(label=f"Clip {i+1} Media") for i, _ in enumerate(x)], clips_ui, clips_group)
273
- generate_btn.click(generate_final_video, [clips_ui, bg_music, bg_volume], video_output)
274
- back_btn.click(lambda: [gr.Column.update(visible=True), gr.Column.update(visible=False)], outputs=[input_section, edit_section])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- if __name__ == "__main__":
277
- demo.launch(share=True)
 
1
+ # Import necessary libraries
2
+ from kokoro import KPipeline
3
  import soundfile as sf
4
  import torch
5
  import os
6
+ from moviepy.editor import (
7
+ VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
8
+ CompositeVideoClip, TextClip, concatenate_audioclips
9
+ )
10
+ import moviepy.video.fx.all as vfx
 
 
 
 
 
11
  import moviepy.config as mpy_config
12
  from pydub import AudioSegment
13
+ from PIL import Image, ImageDraw, ImageFont
14
+ import numpy as np
15
  from bs4 import BeautifulSoup
16
+ import base64
17
  from urllib.parse import quote
18
+ import pysrt
19
  from gtts import gTTS
20
  import gradio as gr
21
+ import tempfile
22
+ import random
23
+ import cv2
24
+ import math
25
+ import requests
26
+ import time
27
+ import re
28
  import shutil
29
 
30
+ # Initialize Kokoro TTS pipeline (using American English)
31
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
32
+ # Ensure ImageMagick binary is set
33
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
34
 
35
+ # ---------------- Global Configuration ---------------- #
36
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
37
  OPENROUTER_API_KEY = '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'
38
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
39
+ OUTPUT_VIDEO_FILENAME = "final_video.mp4"
40
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
41
 
42
+ # Global variables to be set per run
43
  TARGET_RESOLUTION = None
44
  TEMP_FOLDER = None
45
 
46
+ # ---------------- Helper Functions ---------------- #
47
  def generate_script(user_input):
48
+ headers = {
49
+ 'Authorization': f'Bearer {OPENROUTER_API_KEY}',
50
+ 'HTTP-Referer': 'https://your-domain.com',
51
+ 'X-Title': 'AI Documentary Maker'
52
+ }
53
+ prompt = f"""Short Documentary Script GeneratorInstructions:
54
+ If I say "use this," just output the script exactly as I gave it.
55
+ If I only give topics, generate a script based on them.
56
+ If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
57
+ And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
58
+ Formatting Rules:
59
+ Title in Square Brackets:
60
+ Each section starts with a one-word title inside [ ] (max two words if necessary).
61
+ This title will be used as a search term for Pexels footage.
62
+ Casual & Funny Narration:
63
+ Each section has 5-10 words of narration.
64
+ Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
65
+ No Special Formatting:
66
+ No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
67
+ Generalized Search Terms:
68
+ If a term is too specific, make it more general for Pexels search.
69
+ Scene-Specific Writing:
70
+ Each section describes only what should be shown in the video.
71
+ Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
72
+ No extra text, just the script.
73
+ Example Output:
74
+ [North Korea]
75
+ Top 5 unknown facts about North Korea.
76
+ [Invisibility]
77
+ North Korea’s internet speed is so fast… it doesn’t exist.
78
+ [Leadership]
79
+ Kim Jong-un once won an election with 100% votes… against himself.
80
+ [Magic]
81
+ North Korea discovered time travel. That’s why their news is always from the past.
82
+ [Warning]
83
+ Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
84
+ [Freedom]
85
+ North Korean citizens can do anything… as long as it's government-approved.
86
+ Now here is the Topic/scrip: {user_input}
87
+ """
88
+ data = {
89
+ 'model': OPENROUTER_MODEL,
90
+ 'messages': [{'role': 'user', 'content': prompt}],
91
+ 'temperature': 0.4,
92
+ 'max_tokens': 5000
93
+ }
94
  try:
95
+ response = requests.post(
96
+ 'https://openrouter.ai/api/v1/chat/completions',
97
+ headers=headers,
98
+ json=data,
99
+ timeout=30
100
+ )
101
+ if response.status_code == 200:
102
+ response_data = response.json()
103
+ if 'choices' in response_data and len(response_data['choices']) > 0:
104
+ return response_data['choices'][0]['message']['content']
105
+ else:
106
+ print("Unexpected response format:", response_data)
107
+ return None
108
+ else:
109
+ print(f"API Error {response.status_code}: {response.text}")
110
+ return None
111
+ except Exception as e:
112
+ print(f"Request failed: {str(e)}")
113
  return None
 
114
 
115
  def parse_script(script_text):
116
  sections = {}
117
  current_title = None
118
  current_text = ""
119
+ try:
120
+ for line in script_text.splitlines():
121
+ line = line.strip()
122
+ if line.startswith("[") and "]" in line:
123
+ bracket_start = line.find("[")
124
+ bracket_end = line.find("]", bracket_start)
125
+ if bracket_start != -1 and bracket_end != -1:
126
+ if current_title is not None:
127
+ sections[current_title] = current_text.strip()
128
+ current_title = line[bracket_start+1:bracket_end]
129
+ current_text = line[bracket_end+1:].strip()
130
+ elif current_title:
131
+ current_text += line + " "
132
+ if current_title:
133
+ sections[current_title] = current_text.strip()
134
+ elements = []
135
+ for title, narration in sections.items():
136
+ if not title or not narration:
137
+ continue
138
+ media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
139
+ words = narration.split()
140
+ duration = max(3, len(words) * 0.5)
141
+ tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
142
+ elements.append(media_element)
143
+ elements.append(tts_element)
144
+ return elements
145
+ except Exception as e:
146
+ print(f"Error parsing script: {e}")
147
+ return []
148
 
149
  def search_pexels_videos(query, pexels_api_key):
150
  headers = {'Authorization': pexels_api_key}
151
  base_url = "https://api.pexels.com/videos/search"
152
+ num_pages = 3
153
+ videos_per_page = 15
154
+ max_retries = 3
155
+ retry_delay = 1
156
+ search_query = query
157
  all_videos = []
158
+ for page in range(1, num_pages + 1):
159
+ for attempt in range(max_retries):
160
+ try:
161
+ params = {"query": search_query, "per_page": videos_per_page, "page": page}
162
+ response = requests.get(base_url, headers=headers, params=params, timeout=10)
163
+ if response.status_code == 200:
164
+ data = response.json()
165
+ videos = data.get("videos", [])
166
+ if not videos:
167
+ print(f"No videos found on page {page}.")
168
  break
169
+ for video in videos:
170
+ video_files = video.get("video_files", [])
171
+ for file in video_files:
172
+ if file.get("quality") == "hd":
173
+ all_videos.append(file.get("link"))
174
+ break
175
+ break
176
+ elif response.status_code == 429:
177
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
178
+ time.sleep(retry_delay)
179
+ retry_delay *= 2
180
+ else:
181
+ print(f"Error fetching videos: {response.status_code} {response.text}")
182
+ if attempt < max_retries - 1:
183
+ print(f"Retrying in {retry_delay} seconds...")
184
+ time.sleep(retry_delay)
185
+ retry_delay *= 2
186
+ else:
187
+ break
188
+ except requests.exceptions.RequestException as e:
189
+ print(f"Request exception: {e}")
190
+ if attempt < max_retries - 1:
191
+ print(f"Retrying in {retry_delay} seconds...")
192
+ time.sleep(retry_delay)
193
+ retry_delay *= 2
194
+ else:
195
+ break
196
+ if all_videos:
197
+ random_video = random.choice(all_videos)
198
+ print(f"Selected random video from {len(all_videos)} HD videos")
199
+ return random_video
200
+ else:
201
+ print("No suitable videos found after searching all pages.")
202
+ return None
203
 
204
  def search_pexels_images(query, pexels_api_key):
205
  headers = {'Authorization': pexels_api_key}
206
  url = "https://api.pexels.com/v1/search"
207
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
208
+ max_retries = 3
209
+ retry_delay = 1
210
+ for attempt in range(max_retries):
211
+ try:
212
+ response = requests.get(url, headers=headers, params=params, timeout=10)
213
+ if response.status_code == 200:
214
+ data = response.json()
215
+ photos = data.get("photos", [])
216
+ if photos:
217
+ photo = random.choice(photos[:min(5, len(photos))])
218
+ img_url = photo.get("src", {}).get("original")
219
+ return img_url
220
+ else:
221
+ print(f"No images found for query: {query}")
222
+ return None
223
+ elif response.status_code == 429:
224
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
225
+ time.sleep(retry_delay)
226
+ retry_delay *= 2
227
+ else:
228
+ print(f"Error fetching images: {response.status_code} {response.text}")
229
+ if attempt < max_retries - 1:
230
+ print(f"Retrying in {retry_delay} seconds...")
231
+ time.sleep(retry_delay)
232
+ retry_delay *= 2
233
+ except requests.exceptions.RequestException as e:
234
+ print(f"Request exception: {e}")
235
+ if attempt < max_retries - 1:
236
+ print(f"Retrying in {retry_delay} seconds...")
237
+ time.sleep(retry_delay)
238
+ retry_delay *= 2
239
+ print(f"No Pexels images found for query: {query} after all attempts")
240
  return None
241
 
242
+ def search_google_images(query):
243
+ try:
244
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
245
+ headers = {"User-Agent": USER_AGENT}
246
+ response = requests.get(search_url, headers=headers, timeout=10)
247
+ soup = BeautifulSoup(response.text, "html.parser")
248
+ img_tags = soup.find_all("img")
249
+ image_urls = []
250
+ for img in img_tags:
251
+ src = img.get("src", "")
252
+ if src.startswith("http") and "gstatic" not in src:
253
+ image_urls.append(src)
254
+ if image_urls:
255
+ return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
256
+ else:
257
+ print(f"No Google Images found for query: {query}")
258
+ return None
259
+ except Exception as e:
260
+ print(f"Error in Google Images search: {e}")
261
+ return None
262
+
263
  def download_image(image_url, filename):
264
  try:
265
+ headers = {"User-Agent": USER_AGENT}
266
+ print(f"Downloading image from: {image_url} to {filename}")
267
+ response = requests.get(image_url, headers=headers, stream=True, timeout=15)
268
  response.raise_for_status()
269
  with open(filename, 'wb') as f:
270
+ for chunk in response.iter_content(chunk_size=8192):
271
+ f.write(chunk)
272
+ print(f"Image downloaded successfully to: {filename}")
273
+ try:
274
+ img = Image.open(filename)
275
+ img.verify()
276
+ img = Image.open(filename)
277
+ if img.mode != 'RGB':
278
+ img = img.convert('RGB')
279
+ img.save(filename)
280
+ print(f"Image validated and processed: {filename}")
281
+ return filename
282
+ except Exception as e_validate:
283
+ print(f"Downloaded file is not a valid image: {e_validate}")
284
+ if os.path.exists(filename):
285
+ os.remove(filename)
286
+ return None
287
+ except requests.exceptions.RequestException as e_download:
288
+ print(f"Image download error: {e_download}")
289
+ if os.path.exists(filename):
290
+ os.remove(filename)
291
+ return None
292
+ except Exception as e_general:
293
+ print(f"General error during image processing: {e_general}")
294
+ if os.path.exists(filename):
295
+ os.remove(filename)
296
+ return None
297
 
298
  def download_video(video_url, filename):
299
  try:
300
  response = requests.get(video_url, stream=True, timeout=30)
301
+ response.raise_for_status()
302
  with open(filename, 'wb') as f:
303
+ for chunk in response.iter_content(chunk_size=8192):
304
+ f.write仰chunk)
305
+ print(f"Video downloaded successfully to: {filename}")
306
  return filename
307
+ except Exception as e:
308
+ print(f"Video download error: {e}")
309
+ if os.path.exists(filename):
310
+ os.remove(filename)
311
+ return None
312
 
313
+ def generate_media(prompt, temp_folder, user_upload=None):
314
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
315
+ if user_upload:
316
+ file_ext = os.path.splitext(user_upload)[1].lower()
317
+ if file_ext in ['.mp4', '.mov', '.avi']:
318
+ shutil.copy(user_upload, os.path.join(temp_folder, f"{safe_prompt}{file_ext}"))
319
+ return {"path": os.path.join(temp_folder, f"{safe_prompt}{file_ext}"), "asset_type": "video"}
320
+ elif file_ext in ['.jpg', '.jpeg', '.png']:
321
+ shutil.copy(user_upload, os.path.join(temp_folder, f"{safe_prompt}.jpg"))
322
+ return {"path": os.path.join(temp_folder, f"{safe_prompt}.jpg"), "asset_type": "image"}
323
+ if "news" in prompt.lower():
324
+ print(f"News-related query detected: {prompt}. Using Google Images...")
325
+ image_file = os.path.join(temp_folder, f"{safe_prompt}_news.jpg")
326
+ image_url = search_google_images(prompt)
327
+ if image_url:
328
+ downloaded_image = download_image(image_url, image_file)
329
+ if downloaded_image:
330
+ print(f"News image saved to {downloaded_image}")
331
+ return {"path": downloaded_image, "asset_type": "image"}
332
+ if random.random() < 0.25:
333
+ video_file = os.path.join(temp_folder, f"{safe_prompt}_video.mp4")
334
+ video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
335
+ if video_url:
336
+ downloaded_video = download_video(video_url, video_file)
337
+ if downloaded_video:
338
+ print(f"Video asset saved to {downloaded_video}")
339
+ return {"path": downloaded_video, "asset_type": "video"}
340
+ image_file = os.path.join(temp_folder, f"{safe_prompt}.jpg")
341
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
342
+ if image_url:
343
+ downloaded_image = download_image(image_url, image_file)
344
+ if downloaded_image:
345
+ print(f"Image asset saved to {downloaded_image}")
346
+ return {"path": downloaded_image, "asset_type": "image"}
347
+ fallback_terms = ["nature", "people", "landscape", "technology", "business"]
348
+ for term in fallback_terms:
349
+ print(f"Trying fallback image search with term: {term}")
350
+ fallback_file = os.path.join(temp_folder, f"fallback_{term}.jpg")
351
+ fallback_url = search_pexels_images(term, PEXELS_API_KEY)
352
+ if fallback_url:
353
+ downloaded_fallback = download_image(fallback_url, fallback_file)
354
+ if downloaded_fallback:
355
+ print(f"Fallback image saved to {downloaded_fallback}")
356
+ return {"path": downloaded_fallback, "asset_type": "image"}
357
+ print(f"Failed to generate visual asset for prompt: {prompt}")
358
  return None
359
 
360
+ def generate_tts(text, voice, temp_folder):
361
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
362
+ file_path = os.path.join(temp_folder, f"tts_{safe_text}.wav")
363
+ if os.path.exists(file_path):
364
+ print(f"Using cached TTS for text '{text[:10]}...'")
365
+ return file_path
366
  try:
367
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
368
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
369
+ audio_segments = []
370
+ for i, (gs, ps, audio) in enumerate(generator):
371
+ audio_segments.append(audio)
372
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
373
  sf.write(file_path, full_audio, 24000)
374
+ print(f"TTS audio saved to {file_path} (Kokoro)")
375
  return file_path
376
+ except Exception as e:
377
+ print(f"Error with Kokoro TTS: {e}")
378
  try:
379
+ print("Falling back to gTTS...")
380
  tts = gTTS(text=text, lang='en')
381
+ mp3_path = os.path.join(temp_folder, f"tts_{safe_text}.mp3")
382
  tts.save(mp3_path)
383
  audio = AudioSegment.from_mp3(mp3_path)
384
  audio.export(file_path, format="wav")
385
  os.remove(mp3_path)
386
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
387
  return file_path
388
+ except Exception as fallback_error:
389
+ print(f"Both TTS methods failed: {fallback_error}")
390
+ return generate_silent_audio(max(3, len(text.split()) * 0.5), temp_folder)
391
 
392
+ def generate_silent_audio(duration, temp_folder, sample_rate=24000):
393
+ num_samples = int(duration * sample_rate)
394
+ silence = np.zeros(num_samples, dtype=np.float32)
395
+ silent_path = os.path.join(temp_folder, f"silent_{int(time.time())}.wav")
396
+ sf.write(silent_path, silence, sample_rate)
397
+ print(f"Silent audio generated: {silent_path}")
398
+ return silent_path
399
+
400
+ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
401
  target_w, target_h = target_resolution
402
  clip_aspect = clip.w / clip.h
403
  target_aspect = target_w / target_h
404
+ if clip_aspect > target_aspect:
405
+ new_height = target_h
406
+ new_width = int(new_height * clip_aspect)
407
+ else:
408
+ new_width = target_w
409
+ new_height = int(new_width / clip_aspect)
410
  clip = clip.resize(newsize=(new_width, new_height))
411
  base_scale = 1.15
412
  new_width = int(new_width * base_scale)
 
414
  clip = clip.resize(newsize=(new_width, new_height))
415
  max_offset_x = new_width - target_w
416
  max_offset_y = new_height - target_h
417
+ available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
418
+ if effect_type is None or effect_type == "random":
419
+ effect_type = random.choice(available_effects)
420
+ if effect_type == "zoom-in":
421
+ start_zoom = 0.9
422
+ end_zoom = 1.1
423
+ start_center = (new_width / 2, new_height / 2)
424
+ end_center = start_center
425
+ elif effect_type == "zoom-out":
426
+ start_zoom = 1.1
427
+ end_zoom = 0.9
428
+ start_center = (new_width / 2, new_height / 2)
429
+ end_center = start_center
430
+ elif effect_type == "pan-left":
431
+ start_zoom = 1.0
432
+ end_zoom = 1.0
433
+ start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
434
+ end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
435
+ elif effect_type == "pan-right":
436
+ start_zoom = 1.0
437
+ end_zoom = 1.0
438
+ start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
439
+ end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
440
+ elif effect_type == "up-left":
441
+ start_zoom = 1.0
442
+ end_zoom = 1.0
443
+ start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
444
+ end_center = (target_w / 2, target_h / 2)
445
+ else:
446
+ raise ValueError(f"Unsupported effect_type: {effect_type}")
447
  def transform_frame(get_frame, t):
448
  frame = get_frame(t)
449
  ratio = t / clip.duration if clip.duration > 0 else 0
450
  ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
451
+ current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
452
  crop_w = int(target_w / current_zoom)
453
  crop_h = int(target_h / current_zoom)
454
+ current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
455
+ current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
456
+ min_center_x = crop_w / 2
457
+ max_center_x = new_width - crop_w / 2
458
+ min_center_y = crop_h / 2
459
+ max_center_y = new_height - crop_h / 2
460
+ current_center_x = max(min_center_x, min(current_center_x, max_center_x))
461
+ current_center_y = max(min_center_y, min(current_center_y, max_center_y))
462
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
463
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
464
  return resized_frame
 
478
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
479
  return clip
480
 
481
+ def find_mp3_files():
482
+ mp3_files = []
483
+ for root, dirs, files in os.walk('.'):
484
+ for file in files:
485
+ if file.endswith('.mp3'):
486
+ mp3_path = os.path.join(root, file)
487
+ mp3_files.append(mp3_path)
488
+ print(f"Found MP3 file: {mp3_path}")
489
+ return mp3_files[0] if mp3_files else None
490
+
491
+ def add_background_music(final_video, bg_music_volume=0.08):
492
+ try:
493
+ bg_music_path = find_mp3_files()
494
+ if bg_music_path and os.path.exists(bg_music_path):
495
+ print(f"Adding background music from: {bg_music_path}")
496
+ bg_music = AudioFileClip(bg_music_path)
497
+ if bg_music.duration < final_video.duration:
498
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
499
+ bg_segments = [bg_music] * loops_needed
500
+ bg_music = concatenate_audioclips(bg_segments)
501
+ bg_music = bg_music.subclip(0, final_video.duration)
502
+ bg_music = bg_music.volumex(bg_music_volume)
503
+ video_audio = final_video.audio
504
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
505
+ final_video = final_video.set_audio(mixed_audio)
506
+ print("Background music added successfully")
507
+ else:
508
+ print("No MP3 files found, skipping background music")
509
+ return final_video
510
+ except Exception as e:
511
+ print(f"Error adding background music: {e}")
512
+ print("Continuing without background music")
513
+ return final_video
514
+
515
+ def create_clip(media_path, asset_type, tts_path, duration, customizations, target_resolution, temp_folder):
516
  try:
517
+ print(f"Creating clip with asset_type: {asset_type}, media_path: {media_path}")
518
+ if not os.path.exists(media_path) or not os.path.exists(tts_path):
519
+ print("Missing media or TTS file")
520
+ return None
521
+ audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
522
+ audio_duration = audio_clip.duration
523
+ target_duration = audio_duration + 0.2
524
+ if asset_type == "video":
525
  clip = VideoFileClip(media_path)
526
+ clip = resize_to_fill(clip, target_resolution)
527
+ if clip.duration < target_duration:
528
+ clip = clip.loop(duration=target_duration)
529
+ else:
530
+ clip = clip.subclip(0, target_duration)
531
+ if customizations.get('video_brightness', 1.0) != 1.0:
532
+ clip = clip.fx(vfx.colorx, customizations['video_brightness'])
533
+ if customizations.get('video_contrast', 1.0) != 1.0:
534
+ clip = clip.fx(vfx.contrast, customizations['video_contrast'])
535
+ if customizations.get('video_speed', 1.0) != 1.0:
536
+ clip = clip.fx(vfx.speedx, customizations['video_speed'])
537
+ elif asset_type == "image":
538
+ img = Image.open(media_path)
539
+ if img.mode != 'RGB':
540
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False, dir=temp_folder) as temp:
541
+ img.convert('RGB').save(temp.name)
542
+ media_path = temp.name
543
+ img.close()
544
+ clip = ImageClip(media_path).set_duration(target_duration)
545
+ clip = apply_kenburns_effect(clip, target_resolution, customizations.get('kenburns_effect', 'random'))
546
+ clip = clip.fadein(customizations.get('fade_in_duration', 0.3)).fadeout(customizations.get('fade_out_duration', 0.3))
547
+ if customizations.get('image_brightness', 1.0) != 1.0:
548
+ clip = clip.fx(vfx.colorx, customizations['image_brightness'])
549
+ if customizations.get('image_contrast', 1.0) != 1.0:
550
+ clip = clip.fx(vfx.contrast, customizations['image_contrast'])
551
  else:
552
+ return None
553
+ if customizations.get('text_color') != "transparent":
554
+ try:
555
+ words = customizations['text'].split()
556
+ chunks = []
557
+ current_chunk = []
558
+ for word in words:
559
+ current_chunk.append(word)
560
+ if len(current_chunk) >= customizations.get('text_words_per_chunk', 5):
561
+ chunks.append(' '.join(current_chunk))
562
+ current_chunk = []
563
+ if current_chunk:
564
+ chunks.append(' '.join(current_chunk))
565
+ chunk_duration = audio_duration / len(chunks) if chunks else audio_duration
566
+ subtitle_clips = []
567
+ subtitle_y = target_resolution[1] * customizations.get('text_position_y', 0.70)
568
+ for i, chunk_text in enumerate(chunks):
569
+ start_time = i * chunk_duration
570
+ end_time = (i + 1) * chunk_duration
571
+ txt_clip = TextClip(
572
+ chunk_text,
573
+ fontsize=customizations.get('text_size', 45),
574
+ font=customizations.get('text_font', 'Arial-Bold'),
575
+ color=customizations.get('text_color', 'white'),
576
+ bg_color=customizations.get('bg_color', 'rgba(0, 0, 0, 0.25)'),
577
+ method='caption',
578
+ align=customizations.get('text_alignment', 'center'),
579
+ stroke_width=customizations.get('text_stroke_width', 2),
580
+ stroke_color=customizations.get('text_stroke_color', 'white'),
581
+ size=(target_resolution[0] * customizations.get('text_width_ratio', 0.8), None)
582
+ ).set_start(start_time).set_end(end_time)
583
+ txt_clip = txt_clip.set_position((customizations.get('text_alignment', 'center'), subtitle_y))
584
+ subtitle_clips.append(txt_clip)
585
+ clip = CompositeVideoClip([clip] + subtitle_clips)
586
+ except Exception as sub_error:
587
+ print(f"Subtitle error: {sub_error}")
588
+ txt_clip = TextClip(
589
+ customizations['text'],
590
+ fontsize=customizations.get('text_size', 45),
591
+ font=customizations.get('text_font', 'Arial-Bold'),
592
+ color=customizations.get('text_color', 'white'),
593
+ align=customizations.get('text_alignment', 'center'),
594
+ size=(target_resolution[0] * customizations.get('text_width_ratio', 0.8), None)
595
+ ).set_position((customizations.get('text_alignment', 'center'), int(target_resolution[1] / 3))).set_duration(clip.duration)
596
+ clip = CompositeVideoClip([clip, txt_clip])
597
+ clip = clip.set_audio(audio_clip)
598
+ print(f"Clip created: {clip.duration:.1f}s")
599
+ return clip
600
+ except Exception as e:
601
+ print(f"Error in create_clip: {str(e)}")
602
+ return None
603
+
604
+ def fix_imagemagick_policy():
605
+ try:
606
+ print("Attempting to fix ImageMagick security policies...")
607
+ policy_paths = [
608
+ "/etc/ImageMagick-6/policy.xml",
609
+ "/etc/ImageMagick-7/policy.xml",
610
+ "/etc/ImageMagick/policy.xml",
611
+ "/usr/local/etc/ImageMagick-7/policy.xml"
612
+ ]
613
+ found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
614
+ if not found_policy:
615
+ print("No policy.xml found. Using alternative subtitle method.")
616
+ return False
617
+ print(f"Modifying policy file at {found_policy}")
618
+ os.system(f"sudo cp {found_policy} {found_policy}.bak")
619
+ os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
620
+ os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
621
+ os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
622
+ print("ImageMagick policies updated successfully.")
623
+ return True
624
+ except Exception as e:
625
+ print(f"Error fixing policies: {e}")
626
+ return False
627
 
628
+ # ---------------- Gradio Interface Logic ---------------- #
629
+ def generate_initial_clips(user_input, resolution):
630
+ global TARGET_RESOLUTION, TEMP_FOLDER
631
+ if resolution == "Full":
632
+ TARGET_RESOLUTION = (1920, 1080)
633
+ elif resolution == "Short":
634
+ TARGET_RESOLUTION = (1080, 1920)
635
+ else:
636
+ TARGET_RESOLUTION = (1920, 1080)
637
+ TEMP_FOLDER = tempfile.mkdtemp()
638
+ fix_imagemagick_policy()
639
+ print("Generating script from API...")
640
+ script = generate_script(user_input)
641
+ if not script:
642
+ print("Failed to generate script.")
643
+ shutil.rmtree(TEMP_FOLDER)
644
+ return None, [], ""
645
+ print("Generated Script:\n", script)
646
+ elements = parse_script(script)
647
+ if not elements:
648
+ print("Failed to parse script into elements.")
649
+ shutil.rmtree(TEMP_FOLDER)
650
+ return None, [], ""
651
+ print(f"Parsed {len(elements)//2} script segments.")
652
  paired_elements = []
653
  for i in range(0, len(elements), 2):
654
  if i + 1 < len(elements):
655
+ paired_elements.append((elements[i], elements[i + 1]))
656
+ if not paired_elements:
657
+ print("No valid script segments found.")
658
+ shutil.rmtree(TEMP_FOLDER)
659
+ return None, [], ""
660
+ clips_data = []
661
+ for idx, (media_elem, tts_elem) in enumerate(paired_elements):
662
+ print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
663
+ media_asset = generate_media(media_elem['prompt'], TEMP_FOLDER)
664
+ if not media_asset:
665
+ print(f"Skipping segment {idx+1} due to missing media asset.")
666
+ continue
667
+ tts_path = generate_tts(tts_elem['text'], tts_elem['voice'], TEMP_FOLDER)
668
+ if not tts_path:
669
+ print(f"Skipping segment {idx+1} due to TTS generation failure.")
670
+ continue
671
+ clips_data.append({
672
+ 'media_path': media_asset['path'],
673
+ 'asset_type': media_asset['asset_type'],
674
+ 'tts_path': tts_path,
675
+ 'text': tts_elem['text'],
676
+ 'customizations': {
677
+ 'text_color': '#FFFFFF',
678
+ 'text_size': 45,
679
+ 'text_font': 'Arial-Bold',
680
+ 'text_alignment': 'center',
681
+ 'text_position_y': 0.70,
682
+ 'text_width_ratio': 0.8,
683
+ 'text_words_per_chunk': 5,
684
+ 'text_stroke_width': 2,
685
+ 'text_stroke_color': '#FFFFFF',
686
+ 'bg_color': 'rgba(0, 0, 0, 0.25)',
687
+ 'video_brightness': 1.0,
688
+ 'video_contrast': 1.0,
689
+ 'video_speed': 1.0,
690
+ 'image_brightness': 1.0,
691
+ 'image_contrast': 1.0,
692
+ 'kenburns_effect': 'random',
693
+ 'fade_in_duration': 0.3,
694
+ 'fade_out_duration': 0.3
695
+ }
696
+ })
697
+ if not clips_data:
698
+ print("No clips were successfully created.")
699
+ shutil.rmtree(TEMP_FOLDER)
700
+ return None, [], ""
701
+ return clips_data, [clip['text'] for clip in clips_data], script
702
 
703
+ def update_clips(clips_data, text_inputs, media_uploads, global_customizations, per_clip_customizations):
704
+ for i, (text, upload, per_clip_cust) in enumerate(zip(text_inputs, media_uploads, per_clip_customizations)):
705
+ if i < len(clips_data):
706
+ clips_data[i]['text'] = text
707
+ if upload:
708
+ media_asset = generate_media(clips_data[i]['customizations']['prompt'] if 'prompt' in clips_data[i]['customizations'] else f"clip_{i}", TEMP_FOLDER, upload)
709
+ if media_asset:
710
+ clips_data[i]['media_path'] = media_asset['path']
711
+ clips_data[i]['asset_type'] = media_asset['asset_type']
712
+ # Update customizations: per-clip overrides global
713
+ clips_data[i]['customizations'] = {**global_customizations, **per_clip_cust}
714
+ clips_data[i]['customizations']['text'] = text
715
+ return clips_data
716
 
717
+ def generate_final_video(clips_data):
718
+ clips = []
719
+ for clip_data in clips_data:
720
+ clip = create_clip(
721
+ media_path=clip_data['media_path'],
722
+ asset_type=clip_data['asset_type'],
723
+ tts_path=clip_data['tts_path'],
724
+ duration=AudioFileClip(clip_data['tts_path']).duration,
725
+ customizations=clip_data['customizations'],
726
+ target_resolution=TARGET_RESOLUTION,
727
+ temp_folder=TEMP_FOLDER
728
+ )
729
+ if clip:
730
+ clips.append(clip)
731
+ else:
732
+ print(f"Clip creation failed for segment.")
733
+ if not clips:
734
+ print("No clips were successfully created.")
735
+ shutil.rmtree(TEMP_FOLDER)
736
+ return None
737
+ print("\nConcatenating clips...")
738
+ final_video = concatenate_videoclips(clips, method="compose")
739
+ final_video = add_background_music(final_video, bg_music_volume=0.08)
740
+ print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
741
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
742
+ print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
743
+ print("Cleaning up temporary files...")
744
+ shutil.rmtree(TEMP_FOLDER)
745
+ print("Temporary files removed.")
746
+ return OUTPUT_VIDEO_FILENAME
747
 
748
+ # ---------------- Gradio Interface ---------------- #
749
+ with gr.Blocks() as iface:
750
+ gr.Markdown("# Highly Customizable AI Video Generator")
751
+ gr.Markdown("Input a concept, edit AI-generated clips, customize extensively, and generate your video!")
752
+
753
+ # Step 1: Prompt Input and Script Generation
754
+ with gr.Row():
755
+ user_input = gr.Textbox(label="Video Concept", placeholder="Enter your video concept here...")
756
+ resolution = gr.Radio(["Full", "Short"], label="Resolution", value="Full")
757
+ generate_script_btn = gr.Button("Generate Script and Clips")
758
+
759
+ # Display Generated Script
760
+ script_output = gr.Textbox(label="Generated Script", interactive=False)
761
+
762
+ # Global Customization Options
763
+ with gr.Group():
764
+ gr.Markdown("## Global Customization Options")
765
  with gr.Row():
766
+ global_text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
767
+ global_text_size = gr.Slider(20, 100, step=1, label="Text Size", value=45)
768
+ global_text_font = gr.Dropdown(["Arial-Bold", "Times-Roman", "Courier"], label="Text Font", value="Arial-Bold")
769
+ with gr.Row():
770
+ global_text_alignment = gr.Dropdown(["center", "left", "right"], label="Text Alignment", value="center")
771
+ global_text_position_y = gr.Slider(0.1, 0.9, step=0.05, label="Text Y Position", value=0.70)
772
+ global_text_width_ratio = gr.Slider(0.5, 1.0, step=0.05, label="Text Width Ratio", value=0.8)
773
+ with gr.Row():
774
+ global_text_words_per_chunk = gr.Slider(3, 10, step=1, label="Words per Subtitle Chunk", value=5)
775
+ global_text_stroke_width = gr.Slider(0, 5, step=1, label="Text Stroke Width", value=2)
776
+ global_text_stroke_color = gr.ColorPicker(label="Text Stroke Color", value="#FFFFFF")
777
+ with gr.Row():
778
+ global_bg_color = gr.ColorPicker(label="Background Color", value="rgba(0, 0, 0, 0.25)")
779
+ global_video_brightness = gr.Slider(0.5, 1.5, step=0.05, label="Video Brightness", value=1.0)
780
+ global_video_contrast = gr.Slider(0.5, 1.5, step=0.05, label="Video Contrast", value=1.0)
781
+ with gr.Row():
782
+ global_video_speed = gr.Slider(0.5, 2.0, step=0.1, label="Video Speed", value=1.0)
783
+ global_image_brightness = gr.Slider(0.5, 1.5, step=0.05, label="Image Brightness", value=1.0)
784
+ global_image_contrast = gr.Slider(0.5, 1.5, step=0.05, label="Image Contrast", value=1.0)
785
+ with gr.Row():
786
+ global_kenburns_effect = gr.Dropdown(["random", "zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"], label="Ken Burns Effect", value="random")
787
+ global_fade_in_duration = gr.Slider(0.1, 1.0, step=0.1, label="Fade In Duration", value=0.3)
788
+ global_fade_out_duration = gr.Slider(0.1, 1.0, step=0.1, label="Fade Out Duration", value=0.3)
789
+
790
+ # Clip Editing Interface
791
+ clip_state = gr.State()
792
+ text_inputs = gr.State([])
793
+ with gr.Column():
794
+ gr.Markdown("## Edit Clips")
795
+ clip_editors = gr.Column()
796
+
797
+ def create_clip_editors(clips_data, text_list, script):
798
+ if not clips_data:
799
+ return gr.update(visible=False), clips_data, text_list
800
+ with clip_editors:
801
+ editors = []
802
+ text_inputs_list = []
803
+ media_uploads_list = []
804
+ per_clip_customizations = []
805
+ for i, clip in enumerate(clips_data):
806
+ with gr.Group():
807
+ gr.Markdown(f"### Clip {i+1}: {clip['customizations'].get('prompt', 'Untitled')}")
808
+ text_input = gr.Textbox(label="Narration Text", value=clip['text'], interactive=True)
809
+ media_upload = gr.File(label="Upload Image/Video (Optional)", type="filepath")
810
+ with gr.Accordion("Advanced Customization", open=False):
811
+ text_color = gr.ColorPicker(label="Text Color (Override)", value=clip['customizations']['text_color'])
812
+ text_size = gr.Slider(20, 100, step=1, label="Text Size (Override)", value=clip['customizations']['text_size'])
813
+ text_font = gr.Dropdown(["Arial-Bold", "Times-Roman", "Courier"], label="Text Font (Override)", value=clip['customizations']['text_font'])
814
+ text_alignment = gr.Dropdown(["center", "left", "right"], label="Text Alignment (Override)", value=clip['customizations']['text_alignment'])
815
+ text_position_y = gr.Slider(0.1, 0.9, step=0.05, label="Text Y Position (Override)", value=clip['customizations']['text_position_y'])
816
+ text_width_ratio = gr.Slider(0.5, 1.0, step=0.05, label="Text Width Ratio (Override)", value=clip['customizations']['text_width_ratio'])
817
+ text_words_per_chunk = gr.Slider(3, 10, step=1, label="Words per Chunk (Override)", value=clip['customizations']['text_words_per_chunk'])
818
+ text_stroke_width = gr.Slider(0, 5, step=1, label="Text Stroke Width (Override)", value=clip['customizations']['text_stroke_width'])
819
+ text_stroke_color = gr.ColorPicker(label="Text Stroke Color (Override)", value=clip['customizations']['text_stroke_color'])
820
+ bg_color = gr.ColorPicker(label="Background Color (Override)", value=clip['customizations']['bg_color'])
821
+ video_brightness = gr.Slider(0.5, 1.5, step=0.05, label="Video Brightness (Override)", value=clip['customizations']['video_brightness'])
822
+ video_contrast = gr.Slider(0.5, 1.5, step=0.05, label="Video Contrast (Override)", value=clip['customizations']['video_contrast'])
823
+ video_speed = gr.Slider(0.5, 2.0, step=0.1, label="Video Speed (Override)", value=clip['customizations']['video_speed'])
824
+ image_brightness = gr.Slider(0.5, 1.5, step=0.05, label="Image Brightness (Override)", value=clip['customizations']['image_brightness'])
825
+ image_contrast = gr.Slider(0.5, 1.5, step=0.05, label="Image Contrast (Override)", value=clip['customizations']['image_contrast'])
826
+ kenburns_effect = gr.Dropdown(["random", "zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"], label="Ken Burns Effect (Override)", value=clip['customizations']['kenburns_effect'])
827
+ fade_in_duration = gr.Slider(0.1, 1.0, step=0.1, label="Fade In Duration (Override)", value=clip['customizations']['fade_in_duration'])
828
+ fade_out_duration = gr.Slider(0.1, 1.0, step=0.1, label="Fade Out Duration (Override)", value=clip['customizations']['fade_out_duration'])
829
+ editors.append([text_input, media_upload, text_color, text_size, text_font, text_alignment, text_position_y, text_width_ratio, text_words_per_chunk, text_stroke_width, text_stroke_color, bg_color, video_brightness, video_contrast, video_speed, image_brightness, image_contrast, kenburns_effect, fade_in_duration, fade_out_duration])
830
+ text_inputs_list.append(text_input)
831
+ media_uploads_list.append(media_upload)
832
+ per_clip_customizations.append({
833
+ 'text_color': text_color,
834
+ 'text_size': text_size,
835
+ 'text_font': text_font,
836
+ 'text_alignment': text_alignment,
837
+ 'text_position_y': text_position_y,
838
+ 'text_width_ratio': text_width_ratio,
839
+ 'text_words_per_chunk': text_words_per_chunk,
840
+ 'text_stroke_width': text_stroke_width,
841
+ 'text_stroke_color': text_stroke_color,
842
+ 'bg_color': bg_color,
843
+ 'video_brightness': video_brightness,
844
+ 'video_contrast': video_contrast,
845
+ 'video_speed': video_speed,
846
+ 'image_brightness': image_brightness,
847
+ 'image_contrast': image_contrast,
848
+ 'kenburns_effect': kenburns_effect,
849
+ 'fade_in_duration': fade_in_duration,
850
+ 'fade_out_duration': fade_out_duration
851
+ })
852
+ return gr.update(visible=True), clips_data, text_inputs_list
853
+ return gr.update(visible=False), clips_data, text_list
854
+
855
+ generate_script_btn.click(
856
+ fn=generate_initial_clips,
857
+ inputs=[user_input, resolution],
858
+ outputs=[clip_state, text_inputs, script_output]
859
+ ).then(
860
+ fn=create_clip_editors,
861
+ inputs=[clip_state, text_inputs, script_output],
862
+ outputs=[clip_editors, clip_state, text_inputs]
863
+ )
864
+
865
+ # Generate Video Button
866
+ generate_video_btn = gr.Button("Generate Final Video")
867
+ video_output = gr.Video(label="Generated Video")
868
+
869
+ def gather_inputs_and_generate(clips_data, *args):
870
+ text_inputs = args[:len(clips_data)]
871
+ media_uploads = args[len(clips_data):2*len(clips_data)]
872
+ per_clip_cust_inputs = args[2*len(clips_data):2*len(clips_data) + 18*len(clips_data)]
873
+ global_cust_inputs = args[2*len(clips_data) + 18*len(clips_data):]
874
+ per_clip_customizations = []
875
+ for i in range(len(clips_data)):
876
+ start_idx = i * 18
877
+ per_clip_customizations.append({
878
+ 'text_color': per_clip_cust_inputs[start_idx],
879
+ 'text_size': per_clip_cust_inputs[start_idx + 1],
880
+ 'text_font': per_clip_cust_inputs[start_idx + 2],
881
+ 'text_alignment': per_clip_cust_inputs[start_idx + 3],
882
+ 'text_position_y': per_clip_cust_inputs[start_idx + 4],
883
+ 'text_width_ratio': per_clip_cust_inputs[start_idx + 5],
884
+ 'text_words_per_chunk': per_clip_cust_inputs[start_idx + 6],
885
+ 'text_stroke_width': per_clip_cust_inputs[start_idx + 7],
886
+ 'text_stroke_color': per_clip_cust_inputs[start_idx + 8],
887
+ 'bg_color': per_clip_cust_inputs[start_idx + 9],
888
+ 'video_brightness': per_clip_cust_inputs[start_idx + 10],
889
+ 'video_contrast': per_clip_cust_inputs[start_idx + 11],
890
+ 'video_speed': per_clip_cust_inputs[start_idx + 12],
891
+ 'image_brightness': per_clip_cust_inputs[start_idx + 13],
892
+ 'image_contrast': per_clip_cust_inputs[start_idx + 14],
893
+ 'kenburns_effect': per_clip_cust_inputs[start_idx + 15],
894
+ 'fade_in_duration': per_clip_cust_inputs[start_idx + 16],
895
+ 'fade_out_duration': per_clip_cust_inputs[start_idx + 17]
896
+ })
897
+ global_customizations = {
898
+ 'text_color': global_cust_inputs[0],
899
+ 'text_size': global_cust_inputs[1],
900
+ 'text_font': global_cust_inputs[2],
901
+ 'text_alignment': global_cust_inputs[3],
902
+ 'text_position_y': global_cust_inputs[4],
903
+ 'text_width_ratio': global_cust_inputs[5],
904
+ 'text_words_per_chunk': global_cust_inputs[6],
905
+ 'text_stroke_width': global_cust_inputs[7],
906
+ 'text_stroke_color': global_cust_inputs[8],
907
+ 'bg_color': global_cust_inputs[9],
908
+ 'video_brightness': global_cust_inputs[10],
909
+ 'video_contrast': global_cust_inputs[11],
910
+ 'video_speed': global_cust_inputs[12],
911
+ 'image_brightness': global_cust_inputs[13],
912
+ 'image_contrast': global_cust_inputs[14],
913
+ 'kenburns_effect': global_cust_inputs[15],
914
+ 'fade_in_duration': global_cust_inputs[16],
915
+ 'fade_out_duration': global_cust_inputs[17]
916
+ }
917
+ updated_clips = update_clips(clips_data, text_inputs, media_uploads, global_customizations, per_clip_customizations)
918
+ return generate_final_video(updated_clips)
919
+
920
+ generate_video_btn.click(
921
+ fn=gather_inputs_and_generate,
922
+ inputs=[clip_state] + text_inputs.value + [gr.File(type="filepath")] * len(text_inputs.value) + [gr.ColorPicker(), gr.Slider(), gr.Dropdown(), gr.Dropdown(), gr.Slider(), gr.Slider(), gr.Slider(), gr.Slider(), gr.ColorPicker(), gr.ColorPicker(), gr.Slider(), gr.Slider(), gr.Slider(), gr.Slider(), gr.Slider(), gr.Dropdown(), gr.Slider(), gr.Slider()] * len(text_inputs.value) + [global_text_color, global_text_size, global_text_font, global_text_alignment, global_text_position_y, global_text_width_ratio, global_text_words_per_chunk, global_text_stroke_width, global_text_stroke_color, global_bg_color, global_video_brightness, global_video_contrast, global_video_speed, global_image_brightness, global_image_contrast, global_kenburns_effect, global_fade_in_duration, global_fade_out_duration],
923
+ outputs=video_output
924
+ )
925
 
926
+ iface.launch(share=True)