Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -55,6 +55,28 @@ def error(message):
|
|
55 |
print(formatted_message)
|
56 |
return formatted_message
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def choose_random_music():
|
59 |
"""Selects a random music file from the music directory."""
|
60 |
if not os.path.exists(MUSIC_DIR):
|
@@ -70,11 +92,13 @@ def choose_random_music():
|
|
70 |
|
71 |
class YouTube:
|
72 |
def __init__(self, niche: str, language: str,
|
73 |
-
text_gen="
|
74 |
-
image_gen="
|
75 |
-
tts_engine="
|
76 |
-
subtitle_font="
|
77 |
-
text_color="white", highlight_color="blue",
|
|
|
|
|
78 |
api_keys=None, progress=gr.Progress()) -> None:
|
79 |
|
80 |
"""Initialize the YouTube Shorts Generator."""
|
@@ -95,6 +119,10 @@ class YouTube:
|
|
95 |
self.font_size = font_size
|
96 |
self.text_color = text_color
|
97 |
self.highlight_color = highlight_color
|
|
|
|
|
|
|
|
|
98 |
self.api_keys = api_keys or {}
|
99 |
self.images = []
|
100 |
self.logs = []
|
@@ -120,6 +148,8 @@ class YouTube:
|
|
120 |
self.log(f"Text generator: {text_gen} - Model: {text_model}")
|
121 |
self.log(f"Image generator: {image_gen} - Model: {image_model}")
|
122 |
self.log(f"TTS engine: {tts_engine} - Voice: {tts_voice}")
|
|
|
|
|
123 |
|
124 |
def log(self, message):
|
125 |
"""Add a log message to the logs list."""
|
@@ -158,7 +188,7 @@ class YouTube:
|
|
158 |
elif self.text_gen == "g4f":
|
159 |
self.log("Using G4F for text generation")
|
160 |
import g4f
|
161 |
-
model_to_use = model if model else
|
162 |
self.log(f"Using G4F model: {model_to_use}")
|
163 |
response = g4f.ChatCompletion.create(
|
164 |
model=model_to_use,
|
@@ -687,6 +717,10 @@ class YouTube:
|
|
687 |
|
688 |
def generate_subtitles(self, audio_path):
|
689 |
"""Generate word-level subtitles for the video."""
|
|
|
|
|
|
|
|
|
690 |
self.progress(0.65, desc="Creating subtitles")
|
691 |
self.log("Starting subtitle generation process")
|
692 |
|
@@ -726,7 +760,7 @@ class YouTube:
|
|
726 |
FONT = self.subtitle_font
|
727 |
FONTSIZE = self.font_size
|
728 |
COLOR = self.text_color
|
729 |
-
BG_COLOR = self.highlight_color
|
730 |
FRAME_SIZE = (1080, 1920)
|
731 |
MAX_CHARS = 30
|
732 |
MAX_DURATION = 3.0
|
@@ -777,7 +811,15 @@ class YouTube:
|
|
777 |
self.log(success(f"Generated {len(subtitles)} subtitle lines"))
|
778 |
return {
|
779 |
"wordlevel": wordlevel_info,
|
780 |
-
"linelevel": subtitles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
781 |
}
|
782 |
|
783 |
except Exception as e:
|
@@ -854,9 +896,20 @@ class YouTube:
|
|
854 |
|
855 |
self.log(success(f"Generated {len(wordlevel_info)} simulated word timings and {len(subtitles)} subtitle lines"))
|
856 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
857 |
return {
|
858 |
"wordlevel": wordlevel_info,
|
859 |
-
"linelevel": subtitles
|
|
|
860 |
}
|
861 |
|
862 |
def combine(self) -> str:
|
@@ -937,11 +990,16 @@ class YouTube:
|
|
937 |
final_clip = final_clip.set_fps(30)
|
938 |
|
939 |
# Add background music if available
|
940 |
-
|
941 |
-
if
|
942 |
-
|
|
|
|
|
|
|
|
|
|
|
943 |
try:
|
944 |
-
music_clip = AudioFileClip(
|
945 |
# Loop music if it's shorter than the video
|
946 |
if music_clip.duration < max_duration:
|
947 |
repeats = int(max_duration / music_clip.duration) + 1
|
@@ -964,58 +1022,86 @@ class YouTube:
|
|
964 |
# Set final duration
|
965 |
final_clip = final_clip.set_duration(tts_clip.duration)
|
966 |
|
967 |
-
# Generate subtitles if
|
968 |
subtitle_clips = []
|
969 |
-
|
970 |
-
|
971 |
-
if subtitles and 'wordlevel' in subtitles:
|
972 |
-
self.log("Adding word-level subtitles")
|
973 |
|
974 |
-
|
975 |
-
|
976 |
-
# Define subtitle styles
|
977 |
-
font = self.subtitle_font if os.path.exists(os.path.join(FONTS_DIR, f"{self.subtitle_font}.ttf")) else None
|
978 |
-
fontsize = self.font_size
|
979 |
-
color = self.text_color
|
980 |
-
bg_color = self.highlight_color
|
981 |
-
|
982 |
-
# Add subtitles as highlighted words
|
983 |
-
for subtitle in subtitles['linelevel']:
|
984 |
-
full_duration = subtitle['end'] - subtitle['start']
|
985 |
|
986 |
-
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
|
|
|
|
991 |
|
992 |
-
#
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
997 |
|
998 |
-
#
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1019 |
|
1020 |
# Add subtitles to video if any were created
|
1021 |
if subtitle_clips:
|
@@ -1130,18 +1216,18 @@ def get_text_generator_models(generator):
|
|
1130 |
"gemini-1.5-pro"
|
1131 |
],
|
1132 |
"g4f": [
|
1133 |
-
"gpt-
|
1134 |
-
"gpt-4",
|
1135 |
"gpt-4o",
|
|
|
1136 |
"llama-3-70b-chat",
|
1137 |
"claude-3-opus-20240229",
|
1138 |
"claude-3-sonnet-20240229",
|
1139 |
"claude-3-haiku-20240307"
|
1140 |
],
|
1141 |
"openai": [
|
1142 |
-
"gpt-
|
1143 |
"gpt-4-turbo",
|
1144 |
-
"gpt-
|
1145 |
]
|
1146 |
}
|
1147 |
return models.get(generator, ["default"])
|
@@ -1163,6 +1249,7 @@ def get_image_generator_models(generator):
|
|
1163 |
"lexica"
|
1164 |
],
|
1165 |
"g4f": [
|
|
|
1166 |
"dall-e-3",
|
1167 |
"dall-e-2",
|
1168 |
"midjourney"
|
@@ -1223,16 +1310,25 @@ def get_tts_voices(engine):
|
|
1223 |
|
1224 |
# Create the Gradio interface
|
1225 |
def create_interface():
|
1226 |
-
with gr.Blocks(title="YouTube Shorts Generator") as demo:
|
1227 |
-
gr.Markdown("# YouTube Shorts Generator")
|
1228 |
-
gr.Markdown("Generate engaging YouTube Shorts videos with just a niche and language selection.")
|
1229 |
-
|
1230 |
with gr.Row():
|
1231 |
-
|
1232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1233 |
with gr.Group():
|
1234 |
-
gr.Markdown("###
|
1235 |
-
niche = gr.Textbox(
|
|
|
|
|
|
|
|
|
1236 |
language = gr.Dropdown(
|
1237 |
choices=["English", "Spanish", "French", "German", "Italian", "Portuguese",
|
1238 |
"Russian", "Japanese", "Chinese", "Hindi"],
|
@@ -1240,8 +1336,78 @@ def create_interface():
|
|
1240 |
value="English"
|
1241 |
)
|
1242 |
|
1243 |
-
#
|
1244 |
-
with gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1245 |
gemini_api_key = gr.Textbox(
|
1246 |
label="Gemini API Key",
|
1247 |
type="password",
|
@@ -1268,78 +1434,22 @@ def create_interface():
|
|
1268 |
value=os.environ.get("OPENAI_API_KEY", "")
|
1269 |
)
|
1270 |
|
1271 |
-
# Model Selection
|
1272 |
-
with gr.Accordion("Model Selection", open=True):
|
1273 |
-
# Text Generator
|
1274 |
-
text_gen = gr.Dropdown(
|
1275 |
-
choices=["gemini", "g4f", "openai"],
|
1276 |
-
label="Text Generator",
|
1277 |
-
value="gemini"
|
1278 |
-
)
|
1279 |
-
text_model = gr.Dropdown(
|
1280 |
-
choices=get_text_generator_models("gemini"),
|
1281 |
-
label="Text Model",
|
1282 |
-
value="gemini-2.0-flash"
|
1283 |
-
)
|
1284 |
-
|
1285 |
-
# Image Generator
|
1286 |
-
image_gen = gr.Dropdown(
|
1287 |
-
choices=["prodia", "hercai", "g4f", "segmind", "pollinations"],
|
1288 |
-
label="Image Generator",
|
1289 |
-
value="prodia"
|
1290 |
-
)
|
1291 |
-
image_model = gr.Dropdown(
|
1292 |
-
choices=get_image_generator_models("prodia"),
|
1293 |
-
label="Image Model",
|
1294 |
-
value="sdxl"
|
1295 |
-
)
|
1296 |
-
|
1297 |
-
# TTS Engine
|
1298 |
-
tts_engine = gr.Dropdown(
|
1299 |
-
choices=["elevenlabs", "gtts", "openai", "edge"],
|
1300 |
-
label="Text-to-Speech Engine",
|
1301 |
-
value="elevenlabs"
|
1302 |
-
)
|
1303 |
-
tts_voice = gr.Dropdown(
|
1304 |
-
choices=get_tts_voices("elevenlabs"),
|
1305 |
-
label="TTS Voice",
|
1306 |
-
value="Sarah"
|
1307 |
-
)
|
1308 |
-
|
1309 |
-
# Subtitle Options
|
1310 |
-
with gr.Accordion("Subtitle Options", open=False):
|
1311 |
-
subtitle_font = gr.Dropdown(
|
1312 |
-
choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"],
|
1313 |
-
label="Font",
|
1314 |
-
value="Helvetica-Bold"
|
1315 |
-
)
|
1316 |
-
font_size = gr.Slider(
|
1317 |
-
minimum=40,
|
1318 |
-
maximum=120,
|
1319 |
-
value=80,
|
1320 |
-
step=5,
|
1321 |
-
label="Font Size"
|
1322 |
-
)
|
1323 |
-
|
1324 |
-
with gr.Row():
|
1325 |
-
text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
|
1326 |
-
highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
|
1327 |
-
|
1328 |
# Generate button
|
1329 |
-
generate_btn = gr.Button("Generate Video", variant="primary")
|
1330 |
-
|
1331 |
-
|
1332 |
-
|
1333 |
-
video_output = gr.Video(label="Generated Video")
|
1334 |
-
|
1335 |
with gr.Tabs():
|
|
|
|
|
|
|
1336 |
with gr.TabItem("Metadata"):
|
1337 |
-
title_output = gr.Textbox(label="Title")
|
1338 |
-
description_output = gr.Textbox(label="Description", lines=
|
1339 |
-
script_output = gr.Textbox(label="Script", lines=
|
1340 |
-
|
1341 |
-
with gr.TabItem("
|
1342 |
-
log_output = gr.Textbox(label="Process Log", lines=
|
1343 |
|
1344 |
# Dynamic dropdown updates
|
1345 |
def update_text_models(generator):
|
@@ -1360,8 +1470,9 @@ def create_interface():
|
|
1360 |
def generate_youtube_short(niche, language, gemini_api_key, assemblyai_api_key,
|
1361 |
elevenlabs_api_key, segmind_api_key, openai_api_key,
|
1362 |
text_gen, text_model, image_gen, image_model,
|
1363 |
-
tts_engine, tts_voice,
|
1364 |
-
|
|
|
1365 |
|
1366 |
if not niche.strip():
|
1367 |
return {
|
@@ -1396,6 +1507,10 @@ def create_interface():
|
|
1396 |
font_size=font_size,
|
1397 |
text_color=text_color,
|
1398 |
highlight_color=highlight_color,
|
|
|
|
|
|
|
|
|
1399 |
api_keys=api_keys,
|
1400 |
progress=progress
|
1401 |
)
|
@@ -1438,11 +1553,25 @@ def create_interface():
|
|
1438 |
inputs=[
|
1439 |
niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
|
1440 |
segmind_api_key, openai_api_key, text_gen, text_model, image_gen, image_model,
|
1441 |
-
tts_engine, tts_voice,
|
|
|
1442 |
],
|
1443 |
outputs=[video_output, title_output, description_output, script_output, log_output]
|
1444 |
)
|
1445 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1446 |
return demo
|
1447 |
|
1448 |
# Create and launch the interface
|
|
|
55 |
print(formatted_message)
|
56 |
return formatted_message
|
57 |
|
58 |
+
def get_music_files():
|
59 |
+
"""Get list of available music files in the music directory."""
|
60 |
+
if not os.path.exists(MUSIC_DIR):
|
61 |
+
return ["none"]
|
62 |
+
|
63 |
+
music_files = [f for f in os.listdir(MUSIC_DIR) if f.endswith(('.mp3', '.wav'))]
|
64 |
+
if not music_files:
|
65 |
+
return ["none"]
|
66 |
+
|
67 |
+
return ["random"] + music_files
|
68 |
+
|
69 |
+
def get_font_files():
|
70 |
+
"""Get list of available font files in the fonts directory."""
|
71 |
+
if not os.path.exists(FONTS_DIR):
|
72 |
+
return ["default"]
|
73 |
+
|
74 |
+
font_files = [f.split('.')[0] for f in os.listdir(FONTS_DIR) if f.endswith(('.ttf', '.otf'))]
|
75 |
+
if not font_files:
|
76 |
+
return ["default"]
|
77 |
+
|
78 |
+
return ["default"] + font_files
|
79 |
+
|
80 |
def choose_random_music():
|
81 |
"""Selects a random music file from the music directory."""
|
82 |
if not os.path.exists(MUSIC_DIR):
|
|
|
92 |
|
93 |
class YouTube:
|
94 |
def __init__(self, niche: str, language: str,
|
95 |
+
text_gen="g4f", text_model="gpt-4",
|
96 |
+
image_gen="g4f", image_model="flux",
|
97 |
+
tts_engine="edge", tts_voice="en-US-AriaNeural",
|
98 |
+
subtitle_font="default", font_size=80,
|
99 |
+
text_color="white", highlight_color="blue",
|
100 |
+
subtitles_enabled=True, highlighting_enabled=True,
|
101 |
+
subtitle_position="bottom", music_file="random",
|
102 |
api_keys=None, progress=gr.Progress()) -> None:
|
103 |
|
104 |
"""Initialize the YouTube Shorts Generator."""
|
|
|
119 |
self.font_size = font_size
|
120 |
self.text_color = text_color
|
121 |
self.highlight_color = highlight_color
|
122 |
+
self.subtitles_enabled = subtitles_enabled
|
123 |
+
self.highlighting_enabled = highlighting_enabled
|
124 |
+
self.subtitle_position = subtitle_position
|
125 |
+
self.music_file = music_file
|
126 |
self.api_keys = api_keys or {}
|
127 |
self.images = []
|
128 |
self.logs = []
|
|
|
148 |
self.log(f"Text generator: {text_gen} - Model: {text_model}")
|
149 |
self.log(f"Image generator: {image_gen} - Model: {image_model}")
|
150 |
self.log(f"TTS engine: {tts_engine} - Voice: {tts_voice}")
|
151 |
+
self.log(f"Subtitles: {'Enabled' if subtitles_enabled else 'Disabled'} - Highlighting: {'Enabled' if highlighting_enabled else 'Disabled'}")
|
152 |
+
self.log(f"Music: {music_file}")
|
153 |
|
154 |
def log(self, message):
|
155 |
"""Add a log message to the logs list."""
|
|
|
188 |
elif self.text_gen == "g4f":
|
189 |
self.log("Using G4F for text generation")
|
190 |
import g4f
|
191 |
+
model_to_use = model if model else self.text_model
|
192 |
self.log(f"Using G4F model: {model_to_use}")
|
193 |
response = g4f.ChatCompletion.create(
|
194 |
model=model_to_use,
|
|
|
717 |
|
718 |
def generate_subtitles(self, audio_path):
|
719 |
"""Generate word-level subtitles for the video."""
|
720 |
+
if not self.subtitles_enabled:
|
721 |
+
self.log("Subtitles are disabled. Skipping subtitle generation.")
|
722 |
+
return None
|
723 |
+
|
724 |
self.progress(0.65, desc="Creating subtitles")
|
725 |
self.log("Starting subtitle generation process")
|
726 |
|
|
|
760 |
FONT = self.subtitle_font
|
761 |
FONTSIZE = self.font_size
|
762 |
COLOR = self.text_color
|
763 |
+
BG_COLOR = self.highlight_color if self.highlighting_enabled else None
|
764 |
FRAME_SIZE = (1080, 1920)
|
765 |
MAX_CHARS = 30
|
766 |
MAX_DURATION = 3.0
|
|
|
811 |
self.log(success(f"Generated {len(subtitles)} subtitle lines"))
|
812 |
return {
|
813 |
"wordlevel": wordlevel_info,
|
814 |
+
"linelevel": subtitles,
|
815 |
+
"settings": {
|
816 |
+
"font": FONT,
|
817 |
+
"fontsize": FONTSIZE,
|
818 |
+
"color": COLOR,
|
819 |
+
"bg_color": BG_COLOR,
|
820 |
+
"position": self.subtitle_position,
|
821 |
+
"highlighting_enabled": self.highlighting_enabled
|
822 |
+
}
|
823 |
}
|
824 |
|
825 |
except Exception as e:
|
|
|
896 |
|
897 |
self.log(success(f"Generated {len(wordlevel_info)} simulated word timings and {len(subtitles)} subtitle lines"))
|
898 |
|
899 |
+
# Define settings for subtitle display
|
900 |
+
settings = {
|
901 |
+
"font": self.subtitle_font,
|
902 |
+
"fontsize": self.font_size,
|
903 |
+
"color": self.text_color,
|
904 |
+
"bg_color": self.highlight_color if self.highlighting_enabled else None,
|
905 |
+
"position": self.subtitle_position,
|
906 |
+
"highlighting_enabled": self.highlighting_enabled
|
907 |
+
}
|
908 |
+
|
909 |
return {
|
910 |
"wordlevel": wordlevel_info,
|
911 |
+
"linelevel": subtitles,
|
912 |
+
"settings": settings
|
913 |
}
|
914 |
|
915 |
def combine(self) -> str:
|
|
|
990 |
final_clip = final_clip.set_fps(30)
|
991 |
|
992 |
# Add background music if available
|
993 |
+
music_path = None
|
994 |
+
if self.music_file == "random":
|
995 |
+
music_path = choose_random_music()
|
996 |
+
elif self.music_file != "none" and os.path.exists(os.path.join(MUSIC_DIR, self.music_file)):
|
997 |
+
music_path = os.path.join(MUSIC_DIR, self.music_file)
|
998 |
+
|
999 |
+
if music_path and os.path.exists(music_path):
|
1000 |
+
self.log(f"Adding background music: {music_path}")
|
1001 |
try:
|
1002 |
+
music_clip = AudioFileClip(music_path)
|
1003 |
# Loop music if it's shorter than the video
|
1004 |
if music_clip.duration < max_duration:
|
1005 |
repeats = int(max_duration / music_clip.duration) + 1
|
|
|
1022 |
# Set final duration
|
1023 |
final_clip = final_clip.set_duration(tts_clip.duration)
|
1024 |
|
1025 |
+
# Generate subtitles if enabled
|
1026 |
subtitle_clips = []
|
1027 |
+
if self.subtitles_enabled:
|
1028 |
+
subtitles = self.generate_subtitles(self.tts_path)
|
|
|
|
|
1029 |
|
1030 |
+
if subtitles and 'wordlevel' in subtitles:
|
1031 |
+
self.log("Adding word-level subtitles")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1032 |
|
1033 |
+
from moviepy.video.tools.subtitles import TextClip
|
1034 |
+
|
1035 |
+
# Define subtitle styles
|
1036 |
+
font = subtitles['settings']['font'] if subtitles['settings']['font'] != "default" and os.path.exists(os.path.join(FONTS_DIR, f"{subtitles['settings']['font']}.ttf")) else None
|
1037 |
+
fontsize = subtitles['settings']['fontsize']
|
1038 |
+
color = subtitles['settings']['color']
|
1039 |
+
bg_color = subtitles['settings']['bg_color'] if subtitles['settings']['highlighting_enabled'] else None
|
1040 |
|
1041 |
+
# Calculate position based on subtitle_position setting
|
1042 |
+
frame_width, frame_height = 1080, 1920
|
1043 |
+
if self.subtitle_position == "top":
|
1044 |
+
y_pos = frame_height * 0.1 # Position at 10% from top
|
1045 |
+
elif self.subtitle_position == "middle":
|
1046 |
+
y_pos = frame_height * 0.5 # Position at middle
|
1047 |
+
else: # bottom (default)
|
1048 |
+
y_pos = frame_height * 0.85 # Position at 85% from top
|
1049 |
+
|
1050 |
+
for subtitle in subtitles['linelevel']:
|
1051 |
+
full_duration = subtitle['end'] - subtitle['start']
|
1052 |
|
1053 |
+
# Initialize position for each subtitle line
|
1054 |
+
x_pos = 0
|
1055 |
+
x_buffer = frame_width * 1 / 10
|
1056 |
+
|
1057 |
+
# Handle word-level subtitles if highlighting is enabled
|
1058 |
+
if self.highlighting_enabled:
|
1059 |
+
# Add each word with proper timing and highlighting
|
1060 |
+
for word_data in subtitle['words']:
|
1061 |
+
word = word_data['word']
|
1062 |
+
start = word_data['start']
|
1063 |
+
end = word_data['end']
|
1064 |
+
|
1065 |
+
# Create text clip for word
|
1066 |
+
try:
|
1067 |
+
word_clip = TextClip(
|
1068 |
+
txt=word,
|
1069 |
+
font=font,
|
1070 |
+
fontsize=fontsize,
|
1071 |
+
color=color,
|
1072 |
+
bg_color=bg_color,
|
1073 |
+
stroke_color='black',
|
1074 |
+
stroke_width=1
|
1075 |
+
).set_position((x_pos + x_buffer, y_pos)).set_start(start).set_duration(end - start)
|
1076 |
+
|
1077 |
+
subtitle_clips.append(word_clip)
|
1078 |
+
x_pos += word_clip.w + 10 # Add spacing between words
|
1079 |
+
|
1080 |
+
# Wrap to next line if needed
|
1081 |
+
if x_pos + word_clip.w > frame_width - 2 * x_buffer:
|
1082 |
+
x_pos = 0
|
1083 |
+
y_pos += word_clip.h + 10
|
1084 |
+
except Exception as e:
|
1085 |
+
self.log(warning(f"Error creating subtitle for word '{word}': {str(e)}"))
|
1086 |
+
else:
|
1087 |
+
# Show entire line without word-level highlighting
|
1088 |
+
try:
|
1089 |
+
line_clip = TextClip(
|
1090 |
+
txt=subtitle['text'],
|
1091 |
+
font=font,
|
1092 |
+
fontsize=fontsize,
|
1093 |
+
color=color,
|
1094 |
+
bg_color=None,
|
1095 |
+
stroke_color='black',
|
1096 |
+
stroke_width=1,
|
1097 |
+
method='caption',
|
1098 |
+
size=(frame_width - 2 * x_buffer, None),
|
1099 |
+
align='center'
|
1100 |
+
).set_position(('center', y_pos)).set_start(subtitle['start']).set_duration(full_duration)
|
1101 |
+
|
1102 |
+
subtitle_clips.append(line_clip)
|
1103 |
+
except Exception as e:
|
1104 |
+
self.log(warning(f"Error creating subtitle line: {str(e)}"))
|
1105 |
|
1106 |
# Add subtitles to video if any were created
|
1107 |
if subtitle_clips:
|
|
|
1216 |
"gemini-1.5-pro"
|
1217 |
],
|
1218 |
"g4f": [
|
1219 |
+
"gpt-4",
|
|
|
1220 |
"gpt-4o",
|
1221 |
+
"gpt-3.5-turbo",
|
1222 |
"llama-3-70b-chat",
|
1223 |
"claude-3-opus-20240229",
|
1224 |
"claude-3-sonnet-20240229",
|
1225 |
"claude-3-haiku-20240307"
|
1226 |
],
|
1227 |
"openai": [
|
1228 |
+
"gpt-4o",
|
1229 |
"gpt-4-turbo",
|
1230 |
+
"gpt-3.5-turbo"
|
1231 |
]
|
1232 |
}
|
1233 |
return models.get(generator, ["default"])
|
|
|
1249 |
"lexica"
|
1250 |
],
|
1251 |
"g4f": [
|
1252 |
+
"flux",
|
1253 |
"dall-e-3",
|
1254 |
"dall-e-2",
|
1255 |
"midjourney"
|
|
|
1310 |
|
1311 |
# Create the Gradio interface
|
1312 |
def create_interface():
|
1313 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), title="YouTube Shorts Generator") as demo:
|
|
|
|
|
|
|
1314 |
with gr.Row():
|
1315 |
+
gr.Markdown(
|
1316 |
+
"""
|
1317 |
+
# 📱 YouTube Shorts Generator
|
1318 |
+
Generate engaging YouTube Shorts videos with AI. Just provide a niche and language to get started!
|
1319 |
+
"""
|
1320 |
+
)
|
1321 |
+
|
1322 |
+
with gr.Row(equal_height=True):
|
1323 |
+
# Left panel: Content Settings
|
1324 |
+
with gr.Column(scale=1, min_width=400):
|
1325 |
with gr.Group():
|
1326 |
+
gr.Markdown("### 📝 Content")
|
1327 |
+
niche = gr.Textbox(
|
1328 |
+
label="Niche/Topic",
|
1329 |
+
placeholder="What's your video about?",
|
1330 |
+
value="Historical Facts"
|
1331 |
+
)
|
1332 |
language = gr.Dropdown(
|
1333 |
choices=["English", "Spanish", "French", "German", "Italian", "Portuguese",
|
1334 |
"Russian", "Japanese", "Chinese", "Hindi"],
|
|
|
1336 |
value="English"
|
1337 |
)
|
1338 |
|
1339 |
+
# Middle panel: Generator Settings
|
1340 |
+
with gr.Group():
|
1341 |
+
gr.Markdown("### 🔧 Generator Settings")
|
1342 |
+
with gr.Tabs():
|
1343 |
+
with gr.TabItem("Text"):
|
1344 |
+
text_gen = gr.Dropdown(
|
1345 |
+
choices=["g4f", "gemini", "openai"],
|
1346 |
+
label="Text Generator",
|
1347 |
+
value="g4f"
|
1348 |
+
)
|
1349 |
+
text_model = gr.Dropdown(
|
1350 |
+
choices=get_text_generator_models("g4f"),
|
1351 |
+
label="Text Model",
|
1352 |
+
value="gpt-4"
|
1353 |
+
)
|
1354 |
+
|
1355 |
+
with gr.TabItem("Image"):
|
1356 |
+
image_gen = gr.Dropdown(
|
1357 |
+
choices=["g4f", "prodia", "hercai", "segmind", "pollinations"],
|
1358 |
+
label="Image Generator",
|
1359 |
+
value="g4f"
|
1360 |
+
)
|
1361 |
+
image_model = gr.Dropdown(
|
1362 |
+
choices=get_image_generator_models("g4f"),
|
1363 |
+
label="Image Model",
|
1364 |
+
value="flux"
|
1365 |
+
)
|
1366 |
+
|
1367 |
+
with gr.TabItem("Audio"):
|
1368 |
+
tts_engine = gr.Dropdown(
|
1369 |
+
choices=["edge", "elevenlabs", "gtts", "openai"],
|
1370 |
+
label="Speech Engine",
|
1371 |
+
value="edge"
|
1372 |
+
)
|
1373 |
+
tts_voice = gr.Dropdown(
|
1374 |
+
choices=get_tts_voices("edge"),
|
1375 |
+
label="Voice",
|
1376 |
+
value="en-US-AriaNeural"
|
1377 |
+
)
|
1378 |
+
music_file = gr.Dropdown(
|
1379 |
+
choices=get_music_files(),
|
1380 |
+
label="Background Music",
|
1381 |
+
value="random"
|
1382 |
+
)
|
1383 |
+
|
1384 |
+
with gr.TabItem("Subtitles"):
|
1385 |
+
subtitles_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
|
1386 |
+
highlighting_enabled = gr.Checkbox(label="Enable Word Highlighting", value=True)
|
1387 |
+
subtitle_font = gr.Dropdown(
|
1388 |
+
choices=get_font_files(),
|
1389 |
+
label="Font",
|
1390 |
+
value="default"
|
1391 |
+
)
|
1392 |
+
with gr.Row():
|
1393 |
+
font_size = gr.Slider(
|
1394 |
+
minimum=40,
|
1395 |
+
maximum=120,
|
1396 |
+
value=80,
|
1397 |
+
step=5,
|
1398 |
+
label="Font Size"
|
1399 |
+
)
|
1400 |
+
subtitle_position = gr.Dropdown(
|
1401 |
+
choices=["bottom", "middle", "top"],
|
1402 |
+
label="Position",
|
1403 |
+
value="bottom"
|
1404 |
+
)
|
1405 |
+
with gr.Row():
|
1406 |
+
text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
|
1407 |
+
highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
|
1408 |
+
|
1409 |
+
# API Keys section
|
1410 |
+
with gr.Accordion("🔑 API Keys", open=False):
|
1411 |
gemini_api_key = gr.Textbox(
|
1412 |
label="Gemini API Key",
|
1413 |
type="password",
|
|
|
1434 |
value=os.environ.get("OPENAI_API_KEY", "")
|
1435 |
)
|
1436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1437 |
# Generate button
|
1438 |
+
generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
|
1439 |
+
|
1440 |
+
# Right panel: Output display
|
1441 |
+
with gr.Column(scale=1, min_width=400):
|
|
|
|
|
1442 |
with gr.Tabs():
|
1443 |
+
with gr.TabItem("Video", selected=True):
|
1444 |
+
video_output = gr.Video(label="Generated Video", height=600)
|
1445 |
+
|
1446 |
with gr.TabItem("Metadata"):
|
1447 |
+
title_output = gr.Textbox(label="Title", lines=2)
|
1448 |
+
description_output = gr.Textbox(label="Description", lines=4)
|
1449 |
+
script_output = gr.Textbox(label="Script", lines=8)
|
1450 |
+
|
1451 |
+
with gr.TabItem("Log"):
|
1452 |
+
log_output = gr.Textbox(label="Process Log", lines=20, max_lines=100)
|
1453 |
|
1454 |
# Dynamic dropdown updates
|
1455 |
def update_text_models(generator):
|
|
|
1470 |
def generate_youtube_short(niche, language, gemini_api_key, assemblyai_api_key,
|
1471 |
elevenlabs_api_key, segmind_api_key, openai_api_key,
|
1472 |
text_gen, text_model, image_gen, image_model,
|
1473 |
+
tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
|
1474 |
+
subtitle_font, font_size, subtitle_position,
|
1475 |
+
text_color, highlight_color, music_file, progress=gr.Progress()):
|
1476 |
|
1477 |
if not niche.strip():
|
1478 |
return {
|
|
|
1507 |
font_size=font_size,
|
1508 |
text_color=text_color,
|
1509 |
highlight_color=highlight_color,
|
1510 |
+
subtitles_enabled=subtitles_enabled,
|
1511 |
+
highlighting_enabled=highlighting_enabled,
|
1512 |
+
subtitle_position=subtitle_position,
|
1513 |
+
music_file=music_file,
|
1514 |
api_keys=api_keys,
|
1515 |
progress=progress
|
1516 |
)
|
|
|
1553 |
inputs=[
|
1554 |
niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
|
1555 |
segmind_api_key, openai_api_key, text_gen, text_model, image_gen, image_model,
|
1556 |
+
tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
|
1557 |
+
subtitle_font, font_size, subtitle_position, text_color, highlight_color, music_file
|
1558 |
],
|
1559 |
outputs=[video_output, title_output, description_output, script_output, log_output]
|
1560 |
)
|
1561 |
|
1562 |
+
# Add examples
|
1563 |
+
gr.Examples(
|
1564 |
+
[
|
1565 |
+
["Historical Facts", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#0000FF", "random"],
|
1566 |
+
["Cooking Tips", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#FF0000", "random"],
|
1567 |
+
["Technology News", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-GuyNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#00FF00", "random"],
|
1568 |
+
],
|
1569 |
+
[niche, language, text_gen, text_model, image_gen, image_model, tts_engine, tts_voice,
|
1570 |
+
subtitles_enabled, highlighting_enabled, subtitle_font, font_size,
|
1571 |
+
subtitle_position, text_color, highlight_color, music_file],
|
1572 |
+
label="Quick Start Templates"
|
1573 |
+
)
|
1574 |
+
|
1575 |
return demo
|
1576 |
|
1577 |
# Create and launch the interface
|