AZILS commited on
Commit
0a665ec
·
verified ·
1 Parent(s): da52b15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +579 -846
app.py CHANGED
@@ -1,882 +1,615 @@
1
- import os
2
- import re
3
- import uuid
4
- import json
5
- import time
6
- import random
7
- import shutil
8
- import requests
9
- import asyncio
10
- import gradio as gr
11
- from pathlib import Path
12
- import edge_tts
13
- from gtts import gTTS
14
- import numpy as np
15
- import g4f
16
- from g4f.client import Client
17
- import assemblyai as aai
18
- from moviepy.editor import *
19
- from moviepy.video.fx.all import crop
20
- from moviepy.audio.fx.all import volumex
21
- from concurrent.futures import ThreadPoolExecutor
22
-
23
- # Constants
24
- TEMP_DIR = Path("temp")
25
- OUTPUT_DIR = Path("output")
26
- MUSIC_DIR = Path("music")
27
-
28
- # Ensure directories exist
29
- TEMP_DIR.mkdir(exist_ok=True)
30
- OUTPUT_DIR.mkdir(exist_ok=True)
31
- MUSIC_DIR.mkdir(exist_ok=True)
32
-
33
- # Add a sample music file if none exists
34
- if not list(MUSIC_DIR.glob("*.mp3")):
35
- # Create a simple silent audio file as placeholder
36
- silent_clip = AudioClip(lambda t: 0, duration=10)
37
- silent_clip.write_audiofile(MUSIC_DIR / "silence.mp3", fps=44100)
38
-
39
- # Utility functions
40
- def info(msg):
41
- return gr.Info(msg)
42
-
43
- def warning(msg):
44
- return gr.Warning(msg)
45
-
46
- def error(msg):
47
- return gr.Error(msg)
48
-
49
- def generate_id():
50
- return str(uuid.uuid4())
51
-
52
- def choose_random_music():
53
- music_files = list(MUSIC_DIR.glob("*.mp3"))
54
- if not music_files:
55
- return MUSIC_DIR / "silence.mp3"
56
- return random.choice(music_files)
57
-
58
- def parse_model(model_name):
59
- """Parse model name for g4f"""
60
- if model_name == "gpt-4":
61
- return g4f.models.gpt_4
62
- elif model_name == "gpt-3.5-turbo":
63
- return g4f.models.gpt_35_turbo
64
- else:
65
- return model_name
66
-
67
- class YouTubeGenerator:
68
- def __init__(self):
69
- self.reset()
70
-
71
- def reset(self):
72
- """Reset all generation state"""
73
- self.subject = ""
74
- self.script = ""
75
- self.metadata = {"title": "", "description": ""}
76
- self.image_prompts = []
77
- self.images = []
78
- self.tts_path = ""
79
- self.video_path = ""
80
-
81
- def clean_temp_files(self, keep_video=False):
82
- """Clean temporary files except final video if requested"""
83
- for img in self.images:
84
- if os.path.exists(img):
85
- os.remove(img)
86
-
87
- if self.tts_path and os.path.exists(self.tts_path):
88
- os.remove(self.tts_path)
89
-
90
- if not keep_video and self.video_path and os.path.exists(self.video_path):
91
- os.remove(self.video_path)
92
-
93
- def generate_response(self, prompt, model="gpt-4"):
94
- """Generate response using G4F"""
95
- try:
96
- response = g4f.ChatCompletion.create(
97
- model=parse_model(model),
98
- messages=[{"role": "user", "content": prompt}]
99
- )
100
- return response
101
- except Exception as e:
102
- error(f"Error generating response: {str(e)}")
103
- return ""
104
-
105
- def generate_topic(self, niche):
106
- """Generate a topic based on the niche"""
107
- prompt = f"Please generate a specific video idea that takes about the following topic: {niche}. Make it exactly one sentence. Only return the topic, nothing else."
108
- completion = self.generate_response(prompt)
109
-
110
- if not completion:
111
- raise ValueError("Failed to generate topic")
112
-
113
- self.subject = completion
114
- return completion
115
-
116
- def generate_script(self, subject, language):
117
- """Generate video script based on subject"""
118
- prompt = f"""
119
- Generate a script for youtube shorts video, depending on the subject of the video.
120
-
121
- The script is to be returned as a string with the specified number of paragraphs.
122
-
123
- Here is an example of a string:
124
- "This is an example string."
125
-
126
- Do not under any circumstance reference this prompt in your response.
127
-
128
- Get straight to the point, don't start with unnecessary things like, "welcome to this video".
129
-
130
- Obviously, the script should be related to the subject of the video.
131
-
132
- YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
133
- YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
134
- ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS OF WHAT SHOULD BE SPOKEN AT THE BEGINNING OF EACH PARAGRAPH OR LINE. YOU MUST NOT MENTION THE PROMPT, OR ANYTHING ABOUT THE SCRIPT ITSELF. ALSO, NEVER TALK ABOUT THE AMOUNT OF PARAGRAPHS OR LINES. JUST WRITE THE SCRIPT
135
-
136
- Subject: {subject}
137
- Language: {language}
138
- """
139
- completion = self.generate_response(prompt)
140
-
141
- # Apply regex to remove *
142
- completion = re.sub(r"\*", "", completion)
143
-
144
- if not completion:
145
- raise ValueError("The generated script is empty")
146
-
147
- if len(completion) > 5000:
148
- raise ValueError("Generated script is too long (>5000 chars)")
149
-
150
- self.script = completion
151
- return completion
152
-
153
- def generate_metadata(self, subject):
154
- """Generate title and description"""
155
- title_prompt = f"Please generate a YouTube Video Title for the following subject, including hashtags: {subject}. Only return the title, nothing else. Limit the title under 100 characters."
156
- title = self.generate_response(title_prompt)
157
-
158
- if len(title) > 100:
159
- title = title[:97] + "..."
160
-
161
- desc_prompt = f"Please generate a YouTube Video Description for the following script: {self.script}. Only return the description, nothing else."
162
- description = self.generate_response(desc_prompt)
163
-
164
- self.metadata = {
165
- "title": title,
166
- "description": description
167
  }
168
 
169
- return self.metadata
170
-
171
- def generate_prompts(self, subject, script):
172
- """Generate image prompts for the script"""
173
- n_prompts = 5
174
- prompt = f"""
175
- Generate {n_prompts} Image Prompts for AI Image Generation,
176
- depending on the subject of a video.
177
- Subject: {subject}
178
-
179
- The image prompts are to be returned as
180
- a JSON-Array of strings.
181
-
182
- Each search term should consist of a full sentence,
183
- always add the main subject of the video.
184
-
185
- Be emotional and use interesting adjectives to make the
186
- Image Prompt as detailed as possible.
187
-
188
- YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
189
- YOU MUST NOT RETURN ANYTHING ELSE.
190
- YOU MUST NOT RETURN THE SCRIPT.
191
-
192
- The search terms must be related to the subject of the video.
193
- Here is an example of a JSON-Array of strings:
194
- ["image prompt 1", "image prompt 2", "image prompt 3"]
195
-
196
- For context, here is the full text:
197
- {script}
198
- """
199
-
200
- completion = str(self.generate_response(prompt, model="gpt-4"))
201
- completion = completion.replace("```json", "").replace("```", "")
202
-
203
- image_prompts = []
204
 
205
- try:
206
- # First try to parse as JSON directly
207
- image_prompts = json.loads(completion)
208
- if isinstance(image_prompts, dict) and "image_prompts" in image_prompts:
209
- image_prompts = image_prompts["image_prompts"]
210
- except Exception:
211
- # If that fails, try to extract array from the text
212
- try:
213
- # Get everything between [ and ], and turn it into a list
214
- r = re.compile(r"\[.*\]", re.DOTALL)
215
- matches = r.findall(completion)
216
- if matches:
217
- image_prompts = json.loads(matches[0])
218
- except Exception as e:
219
- raise ValueError(f"Failed to parse image prompts: {str(e)}")
220
 
221
- # Ensure we have prompts and they're in the right format
222
- if not image_prompts or not isinstance(image_prompts, list):
223
- raise ValueError("No valid image prompts were generated")
 
 
224
 
225
- # Limit to 5 prompts
226
- image_prompts = image_prompts[:5]
227
-
228
- self.image_prompts = image_prompts
229
- return image_prompts
230
-
231
- def generate_image(self, prompt, provider="g4f", model="sdxl-turbo"):
232
- """Generate an image using the specified provider"""
233
- try:
234
- if provider == "g4f":
235
- client = Client()
236
- response = client.images.generate(
237
- model=model,
238
- prompt=prompt,
239
- response_format="url"
240
- )
241
-
242
- if response and response.data and len(response.data) > 0:
243
- image_url = response.data[0].url
244
- image_response = requests.get(image_url)
245
-
246
- if image_response.status_code == 200:
247
- image_path = str(TEMP_DIR / f"{generate_id()}.png")
248
- with open(image_path, "wb") as f:
249
- f.write(image_response.content)
250
- self.images.append(image_path)
251
- return image_path
252
-
253
- raise ValueError(f"Failed to download image from {image_url}")
254
-
255
- elif provider == "prodia":
256
- s = requests.Session()
257
- headers = {
258
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
259
- }
260
 
261
- # Generate job
262
- resp = s.get(
263
- "https://api.prodia.com/generate",
264
- params={
265
- "new": "true",
266
- "prompt": prompt,
267
- "model": model,
268
- "negative_prompt": "verybadimagenegative_v1.3",
269
- "steps": "20",
270
- "cfg": "7",
271
- "seed": random.randint(1, 10000),
272
- "sample": "DPM++ 2M Karras",
273
- "aspect_ratio": "square"
274
- },
275
- headers=headers
276
- )
277
-
278
- job_id = resp.json().get('job')
279
- if not job_id:
280
- raise ValueError("Failed to get job ID from Prodia")
281
-
282
- # Wait for job to complete
283
- for _ in range(30): # Timeout after 30 attempts (150 seconds)
284
- time.sleep(5)
285
- status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
286
- if status.get("status") == "succeeded":
287
- img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
288
- image_path = str(TEMP_DIR / f"{generate_id()}.png")
289
- with open(image_path, "wb") as f:
290
- f.write(img_data)
291
- self.images.append(image_path)
292
- return image_path
293
-
294
- raise ValueError("Prodia image generation timed out")
295
-
296
- elif provider == "pollinations":
297
- response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
298
- if response.status_code == 200:
299
- image_path = str(TEMP_DIR / f"{generate_id()}.png")
300
- with open(image_path, "wb") as f:
301
- f.write(response.content)
302
- self.images.append(image_path)
303
- return image_path
304
- else:
305
- raise ValueError(f"Pollinations request failed: {response.status_code}")
306
-
307
- else:
308
- raise ValueError(f"Unsupported image provider: {provider}")
309
 
310
- except Exception as e:
311
- raise ValueError(f"Image generation failed: {str(e)}")
312
-
313
- async def generate_edge_tts(self, text, voice):
314
- """Generate speech using Edge TTS"""
315
- try:
316
- audio_path = str(TEMP_DIR / f"{generate_id()}.mp3")
317
- communicate = edge_tts.Communicate(text, voice)
318
- await communicate.save(audio_path)
319
- self.tts_path = audio_path
320
- return audio_path
321
- except Exception as e:
322
- # Try an alternative voice if the specified one fails
323
- try:
324
- fallback_voice = "en-US-ChristopherNeural"
325
- communicate = edge_tts.Communicate(text, fallback_voice)
326
- audio_path = str(TEMP_DIR / f"{generate_id()}.mp3")
327
- await communicate.save(audio_path)
328
- self.tts_path = audio_path
329
- return audio_path
330
- except Exception as nested_e:
331
- raise ValueError(f"Edge TTS failed with both voices: {str(e)}, fallback: {str(nested_e)}")
332
-
333
- def generate_gtts(self, text, lang='en'):
334
- """Generate speech using Google TTS"""
335
- try:
336
- audio_path = str(TEMP_DIR / f"{generate_id()}.mp3")
337
- tts = gTTS(text=text, lang=lang, slow=False)
338
- tts.save(audio_path)
339
- self.tts_path = audio_path
340
- return audio_path
341
- except Exception as e:
342
- raise ValueError(f"Google TTS failed: {str(e)}")
343
-
344
- def generate_speech(self, text, engine="edge", voice="en-US-ChristopherNeural"):
345
- """Generate speech from text using selected engine"""
346
- # Clean text
347
- text = re.sub(r'[^\w\s.?!]', '', text)
348
-
349
- if engine == "edge":
350
- # Edge TTS is async, so we need to run it in an event loop
351
- return asyncio.run(self.generate_edge_tts(text, voice))
352
- elif engine == "gtts":
353
- return self.generate_gtts(text, lang=voice)
354
- else:
355
- raise ValueError(f"Unsupported TTS engine: {engine}")
356
-
357
- def generate_subtitles(self, audio_path, api_key):
358
- """Generate word-highlighted subtitles"""
359
- try:
360
- # Set API key
361
- aai.settings.api_key = api_key
362
-
363
- # Configure transcription
364
- config = aai.TranscriptionConfig(
365
- speaker_labels=False,
366
- word_boost=[],
367
- format_text=True
368
- )
369
-
370
- # Create transcriber and transcribe audio
371
- transcriber = aai.Transcriber(config=config)
372
- transcript = transcriber.transcribe(audio_path)
373
-
374
- # Process word-level information
375
- wordlevel_info = []
376
- for word in transcript.words:
377
- word_data = {
378
- "word": word.text.strip(),
379
- "start": word.start / 1000.0,
380
- "end": word.end / 1000.0
381
- }
382
- wordlevel_info.append(word_data)
383
-
384
- # Split text into lines based on character count and duration
385
- MAX_CHARS = 30
386
- MAX_DURATION = 3.0
387
- MAX_GAP = 2.5
388
-
389
- subtitles = []
390
- line = []
391
- line_duration = 0
392
-
393
- for idx, word_data in enumerate(wordlevel_info):
394
- word = word_data["word"]
395
- start = word_data["start"]
396
- end = word_data["end"]
397
-
398
- line.append(word_data)
399
- line_duration += end - start
400
- temp = " ".join(item["word"] for item in line)
401
- new_line_chars = len(temp)
402
- duration_exceeded = line_duration > MAX_DURATION
403
- chars_exceeded = new_line_chars > MAX_CHARS
404
-
405
- if idx > 0:
406
- gap = word_data['start'] - wordlevel_info[idx - 1]['end']
407
- maxgap_exceeded = gap > MAX_GAP
408
- else:
409
- maxgap_exceeded = False
410
-
411
- # Check if any condition is exceeded to finalize the current line
412
- if duration_exceeded or chars_exceeded or maxgap_exceeded:
413
- if line:
414
- subtitle_line = {
415
- "text": " ".join(item["word"] for item in line),
416
- "start": line[0]["start"],
417
- "end": line[-1]["end"],
418
- "words": line
419
- }
420
- subtitles.append(subtitle_line)
421
- line = []
422
- line_duration = 0
423
-
424
- # Add the remaining words as the last subtitle line if any
425
- if line:
426
- subtitle_line = {
427
- "text": " ".join(item["word"] for item in line),
428
- "start": line[0]["start"],
429
- "end": line[-1]["end"],
430
- "words": line
431
- }
432
- subtitles.append(subtitle_line)
433
-
434
- # Create subtitle clips
435
- all_subtitle_clips = []
436
-
437
- # Define formatting constants
438
- FONT = 'Helvetica-Bold'
439
- FONTSIZE = 80
440
- COLOR = 'white'
441
- BG_COLOR = 'blue'
442
- FRAME_SIZE = (1080, 1920)
443
-
444
- for subtitle in subtitles:
445
- full_duration = subtitle['end'] - subtitle['start']
446
-
447
- word_clips = []
448
- xy_textclips_positions = []
449
-
450
- # Dynamic vertical positioning (moved to bottom)
451
- frame_width, frame_height = FRAME_SIZE
452
- x_pos = 0
453
- y_pos = frame_height * 0.85 # Position at 85% of frame height
454
- x_buffer = frame_width * 1 / 10
455
- y_buffer = 10 # Small vertical buffer
456
-
457
- line_height = 0
458
- current_line_height = 0
459
-
460
- for index, wordJSON in enumerate(subtitle['words']):
461
- duration = wordJSON['end'] - wordJSON['start']
462
- word_clip = TextClip(wordJSON['word'], font=FONT, fontsize=FONTSIZE, color=COLOR).set_start(subtitle['start']).set_duration(full_duration)
463
- word_width, word_height = word_clip.size
464
-
465
- # Track line height for multi-line support
466
- line_height = max(line_height, word_height)
467
-
468
- # Check if the current word exceeds the frame width, move to the next line
469
- if x_pos + word_width > frame_width - 2 * x_buffer:
470
- x_pos = 0
471
- y_pos += line_height + y_buffer
472
- current_line_height += line_height + y_buffer
473
-
474
- # Store the position and other details for highlighting
475
- xy_textclips_positions.append({
476
- "x_pos": x_pos + x_buffer,
477
- "y_pos": y_pos + y_buffer,
478
- "width": word_width,
479
- "height": word_height,
480
- "word": wordJSON['word'],
481
- "start": wordJSON['start'],
482
- "end": wordJSON['end'],
483
- "duration": duration
484
- })
485
-
486
- # Set the position of the word clip
487
- word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
488
- word_clips.append(word_clip)
489
- x_pos = x_pos + word_width + 10
490
-
491
- # Create highlighted word clips
492
- for highlight_word in xy_textclips_positions:
493
- word_clip_highlight = TextClip(highlight_word['word'], font=FONT, fontsize=FONTSIZE, color=COLOR, bg_color=BG_COLOR).set_start(highlight_word['start']).set_duration(highlight_word['duration'])
494
- word_clip_highlight = word_clip_highlight.set_position((highlight_word['x_pos'], highlight_word['y_pos']))
495
- word_clips.append(word_clip_highlight)
496
-
497
- # Add all word clips to the list of subtitle clips
498
- all_subtitle_clips.extend(word_clips)
499
-
500
- return all_subtitle_clips
501
- except Exception as e:
502
- print(f"Subtitle generation error: {str(e)}")
503
- return [] # Return empty list if subtitles fail
504
-
505
- def combine_video(self, include_subtitles=True, subtitles_api_key=""):
506
- """Combine all elements into final video"""
507
- try:
508
- output_path = str(TEMP_DIR / f"{generate_id()}.mp4")
509
 
510
- # Load audio
511
- tts_clip = AudioFileClip(self.tts_path)
512
- max_duration = tts_clip.duration
 
513
 
514
- # Calculate duration per image
515
- req_dur = max_duration / len(self.images)
516
-
517
- # Create video clips from images
518
- clips = []
519
- tot_dur = 0
520
-
521
- while tot_dur < max_duration:
522
- for image_path in self.images:
523
- clip = ImageClip(image_path)
524
- clip.duration = req_dur
525
- clip = clip.set_fps(30)
526
-
527
- # Intelligent cropping for different aspect ratios
528
- aspect_ratio = 9/16 # Standard vertical video ratio
529
- if clip.w / clip.h < aspect_ratio:
530
- clip = crop(
531
- clip,
532
- width=clip.w,
533
- height=round(clip.w / aspect_ratio),
534
- x_center=clip.w / 2,
535
- y_center=clip.h / 2
536
- )
537
- else:
538
- clip = crop(
539
- clip,
540
- width=round(aspect_ratio * clip.h),
541
- height=clip.h,
542
- x_center=clip.w / 2,
543
- y_center=clip.h / 2
544
- )
545
-
546
- clip = clip.resize((1080, 1920)) # Hardcoded frame size for shorts
547
-
548
- clips.append(clip)
549
- tot_dur += clip.duration
550
-
551
- # Break if we have enough duration
552
- if tot_dur >= max_duration:
553
- break
554
-
555
- # Concatenate all clips
556
- final_clip = concatenate_videoclips(clips)
557
- final_clip = final_clip.set_fps(30)
558
-
559
- # Add background music
560
- random_music = choose_random_music()
561
- random_music_clip = AudioFileClip(str(random_music))
562
- random_music_clip = random_music_clip.fx(volumex, 0.1)
563
- random_music_clip = random_music_clip.set_duration(max_duration)
564
-
565
- # Generate subtitles if requested
566
- word_highlighted_clips = []
567
- if include_subtitles and subtitles_api_key:
568
- word_highlighted_clips = self.generate_subtitles(self.tts_path, subtitles_api_key)
569
-
570
- # Combine audio
571
- comp_audio = CompositeAudioClip([
572
- tts_clip,
573
- random_music_clip
574
- ])
575
 
576
- # Set audio and duration
577
- final_clip = final_clip.set_audio(comp_audio)
578
- final_clip = final_clip.set_duration(tts_clip.duration)
 
 
579
 
580
- # Add subtitles if we have them
581
- if word_highlighted_clips:
582
- final_clip = CompositeVideoClip([final_clip] + word_highlighted_clips)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
 
584
- # Write video file
585
- final_clip.write_videofile(output_path, threads=4)
 
 
 
586
 
587
- # Save to output directory with a more descriptive name
588
- safe_title = re.sub(r'[^\w\s-]', '', self.metadata["title"])
589
- safe_title = re.sub(r'[-\s]+', '-', safe_title).strip('-_')
590
- final_output = str(OUTPUT_DIR / f"{safe_title}_{int(time.time())}.mp4")
591
- shutil.copy2(output_path, final_output)
592
 
593
- self.video_path = final_output
594
- return final_output
595
- except Exception as e:
596
- raise ValueError(f"Video generation failed: {str(e)}")
597
-
598
- def generate_full_pipeline(
599
- niche,
600
- language,
601
- img_provider,
602
- img_model,
603
- tts_engine,
604
- tts_voice,
605
- include_subtitles,
606
- assemblyai_key,
607
- progress=gr.Progress()
608
- ):
609
- progress(0, desc="Initializing")
610
- generator = YouTubeGenerator()
611
-
612
- try:
613
- # Step 1: Generate topic
614
- progress(0.05, desc="Generating topic")
615
- topic = generator.generate_topic(niche)
616
-
617
- # Step 2: Generate script
618
- progress(0.1, desc="Generating script")
619
- script = generator.generate_script(topic, language)
620
-
621
- # Step 3: Generate metadata
622
- progress(0.15, desc="Generating metadata")
623
- metadata = generator.generate_metadata(topic)
624
-
625
- # Step 4: Generate image prompts
626
- progress(0.2, desc="Generating image prompts")
627
- image_prompts = generator.generate_prompts(topic, script)
628
-
629
- # Step 5: Generate images
630
- progress(0.3, desc="Generating images")
631
- images = []
632
- total_images = len(image_prompts)
633
-
634
- for i, prompt in enumerate(image_prompts):
635
- progress(0.3 + (0.3 * i/total_images), desc=f"Generating image {i+1}/{total_images}")
636
- img_path = generator.generate_image(prompt, provider=img_provider, model=img_model)
637
- images.append(img_path)
638
-
639
- # Step 6: Generate speech
640
- progress(0.6, desc="Generating speech")
641
- tts_path = generator.generate_speech(script, engine=tts_engine, voice=tts_voice)
642
 
643
- # Step 7: Combine into video
644
- progress(0.8, desc="Creating video")
645
- video_path = generator.combine_video(
646
- include_subtitles=include_subtitles,
647
- subtitles_api_key=assemblyai_key if include_subtitles else ""
648
- )
649
 
650
- progress(1.0, desc="Complete!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
 
652
- return {
653
- "topic": topic,
654
- "script": script,
655
- "title": metadata["title"],
656
- "description": metadata["description"],
657
- "image_prompts": "\n\n".join([f"{i+1}. {prompt}" for i, prompt in enumerate(image_prompts)]),
658
- "images": images,
659
- "audio": tts_path,
660
- "video": video_path,
661
- "video_html": f'
662
- '
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  }
664
-
665
- except Exception as e:
666
- generator.clean_temp_files()
667
- error_msg = str(e)
668
- return {
669
- "topic": f"ERROR: {error_msg}",
670
- "script": "",
671
- "title": "",
672
- "description": "",
673
- "image_prompts": "",
674
- "images": [],
675
- "audio": None,
676
- "video": None,
677
- "video_html": f'
678
- Video generation failed: {error_msg}
679
- '
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  }
681
-
682
- def ui_component():
683
- # Define UI components
684
- with gr.Blocks(theme=gr.themes.Soft()) as app:
685
- gr.Markdown("# YouTube Shorts Generator")
686
- gr.Markdown("Generate complete YouTube Shorts videos with AI")
687
 
688
- with gr.Tabs():
689
- with gr.TabItem("Generate Video"):
690
- with gr.Row():
691
- with gr.Column():
692
- # Input parameters
693
- niche_input = gr.Textbox(label="Video Niche/Topic", placeholder="Historical Facts", value="Historical Facts")
694
- language_input = gr.Dropdown(
695
- label="Language",
696
- choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Russian", "Japanese", "Chinese", "Korean", "Arabic"],
697
- value="English"
698
- )
699
-
700
- with gr.Accordion("Image Generation Settings", open=False):
701
- img_provider = gr.Dropdown(
702
- label="Image Provider",
703
- choices=["g4f", "prodia", "pollinations"],
704
- value="g4f"
705
- )
706
-
707
- img_model = gr.Dropdown(
708
- label="Image Model",
709
- choices=["sdxl-turbo", "sdxl", "stable-diffusion-3", "kandinsky-2.2", "midjourney"],
710
- value="sdxl-turbo"
711
- )
712
-
713
- with gr.Accordion("TTS Settings", open=False):
714
- tts_engine = gr.Dropdown(
715
- label="TTS Engine",
716
- choices=["edge", "gtts"],
717
- value="edge"
718
- )
719
-
720
- tts_voice = gr.Dropdown(
721
- label="TTS Voice",
722
- choices=["en-US-ChristopherNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-US-AriaNeural", "en-GB-SoniaNeural", "en"],
723
- value="en-US-ChristopherNeural"
724
- )
725
-
726
- with gr.Accordion("Subtitle Settings", open=False):
727
- include_subtitles = gr.Checkbox(label="Generate Word-Level Subtitles", value=False)
728
- assemblyai_key = gr.Textbox(
729
- label="AssemblyAI API Key (required for subtitles)",
730
- placeholder="Your AssemblyAI API Key",
731
- type="password"
732
- )
733
-
734
- generate_btn = gr.Button("Generate Video", variant="primary")
735
-
736
- with gr.Column():
737
- # Output display
738
- with gr.Accordion("Generated Content", open=True):
739
- topic_output = gr.Textbox(label="Generated Topic")
740
- script_output = gr.Textbox(label="Generated Script", lines=10)
741
- title_output = gr.Textbox(label="Video Title")
742
- description_output = gr.Textbox(label="Video Description", lines=5)
743
-
744
- with gr.Accordion("Generated Assets", open=True):
745
- prompts_output = gr.Textbox(label="Image Prompts", lines=5)
746
- gallery_output = gr.Gallery(label="Generated Images")
747
- audio_output = gr.Audio(label="Generated Speech")
748
- video_output = gr.Video(label="Final Video")
749
-
750
- video_html = gr.HTML(label="Video Player")
751
-
752
- # Connect button to function
753
- generate_btn.click(
754
- generate_full_pipeline,
755
- inputs=[
756
- niche_input,
757
- language_input,
758
- img_provider,
759
- img_model,
760
- tts_engine,
761
- tts_voice,
762
- include_subtitles,
763
- assemblyai_key
764
- ],
765
- outputs={
766
- "topic": topic_output,
767
- "script": script_output,
768
- "title": title_output,
769
- "description": description_output,
770
- "image_prompts": prompts_output,
771
- "images": gallery_output,
772
- "audio": audio_output,
773
- "video": video_output,
774
- "video_html": video_html
775
  }
776
- )
777
-
778
- with gr.TabItem("Settings"):
779
- gr.Markdown("## Advanced Settings")
780
 
781
- with gr.Accordion("Storage Settings", open=True):
782
- temp_dir = gr.Textbox(label="Temporary Directory", value=str(TEMP_DIR))
783
- output_dir = gr.Textbox(label="Output Directory", value=str(OUTPUT_DIR))
784
- music_dir = gr.Textbox(label="Music Directory", value=str(MUSIC_DIR))
785
-
786
- def update_dirs(temp, output, music):
787
- global TEMP_DIR, OUTPUT_DIR, MUSIC_DIR
788
- TEMP_DIR = Path(temp)
789
- OUTPUT_DIR = Path(output)
790
- MUSIC_DIR = Path(music)
791
-
792
- # Ensure directories exist
793
- TEMP_DIR.mkdir(exist_ok=True)
794
- OUTPUT_DIR.mkdir(exist_ok=True)
795
- MUSIC_DIR.mkdir(exist_ok=True)
796
-
797
- return f"Directories updated: {temp}, {output}, {music}"
798
-
799
- update_dirs_btn = gr.Button("Update Directories")
800
- dirs_status = gr.Textbox(label="Status")
801
-
802
- update_dirs_btn.click(
803
- update_dirs,
804
- inputs=[temp_dir, output_dir, music_dir],
805
- outputs=dirs_status
806
- )
807
-
808
- with gr.Accordion("Add Background Music", open=True):
809
- music_file = gr.File(label="Upload Music File (MP3)")
810
-
811
- def add_music(file):
812
- if file is None:
813
- return "No file selected"
814
-
815
- try:
816
- filename = os.path.basename(file.name)
817
- dest_path = MUSIC_DIR / filename
818
- shutil.copy2(file.name, dest_path)
819
- return f"Music file added: {filename}"
820
- except Exception as e:
821
- return f"Error adding music: {str(e)}"
822
 
823
- add_music_btn = gr.Button("Add Music")
824
- music_status = gr.Textbox(label="Status")
825
 
826
- add_music_btn.click(
827
- add_music,
828
- inputs=music_file,
829
- outputs=music_status
830
- )
831
-
832
- with gr.Accordion("Cleanup", open=True):
833
- def clean_temp():
834
- try:
835
- for file in TEMP_DIR.glob("*"):
836
- file.unlink()
837
- return "Temporary files cleaned successfully"
838
- except Exception as e:
839
- return f"Error cleaning files: {str(e)}"
840
 
841
- clean_temp_btn = gr.Button("Clean Temporary Files")
842
- clean_status = gr.Textbox(label="Status")
 
 
 
 
843
 
844
- clean_temp_btn.click(
845
- clean_temp,
846
- outputs=clean_status
847
- )
848
-
849
- with gr.TabItem("Help"):
850
- gr.Markdown("""
851
- # Help & Troubleshooting
852
 
853
- ## Common Issues
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
854
 
855
- ### Image Generation
 
 
 
 
 
856
 
857
- - **G4F Issues**: If image generation with G4F fails, try switching to "prodia" or "pollinations" provider
858
- - **Model Compatibility**: Not all models work with all providers. If you get errors, try a different model/provider combination
 
 
 
 
859
 
860
- ### TTS Issues
 
 
861
 
862
- - **Edge TTS Voice Error**: If you get "No audio was received" error, try a different voice like "en-US-ChristopherNeural"
863
- - **Google TTS**: For simplicity, use Google TTS with language code "en" if Edge TTS isn't working
 
 
 
 
 
 
 
 
 
 
 
 
 
 
864
 
865
- ### Video Generation
 
866
 
867
- - **Video Creation Errors**: Make sure all images were successfully generated before creating the video
868
- - **Subtitle Issues**: Subtitles require an AssemblyAI API key. You can get a free one at [AssemblyAI](https://www.assemblyai.com/)
869
 
870
- ## Tips for Best Results
 
871
 
872
- 1. **Topics**: Be specific with your niche/topic for better results
873
- 2. **Images**: SDXL Turbo is fastest, but other models may give better quality
874
- 3. **TTS**: Edge TTS generally gives the best quality voice with "en-US-ChristopherNeural"
875
- 4. **Background Music**: Add your own music files for better video quality
876
- """)
 
 
 
 
 
877
 
878
- return app
879
-
880
- if __name__ == "__main__":
881
- app = ui_component()
882
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>YouTube Shorts Generator</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script>
9
+ tailwind.config = {
10
+ darkMode: 'class',
11
+ theme: {
12
+ extend: {
13
+ colors: {
14
+ primary: '#5D5CDE',
15
+ }
16
+ }
17
+ }
18
+ };
19
+ </script>
20
+ <style>
21
+ .loading-spinner {
22
+ border: 4px solid rgba(0, 0, 0, 0.1);
23
+ border-left-color: #5D5CDE;
24
+ border-radius: 50%;
25
+ width: 50px;
26
+ height: 50px;
27
+ animation: spin 1s linear infinite;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
 
30
+ @keyframes spin {
31
+ to { transform: rotate(360deg); }
32
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ .dark .loading-spinner {
35
+ border-color: rgba(255, 255, 255, 0.1);
36
+ border-left-color: #5D5CDE;
37
+ }
38
+ </style>
39
+ </head>
40
+ <body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
41
+ <div class="container mx-auto px-4 py-8 max-w-4xl">
42
+ <h1 class="text-3xl font-bold mb-4 text-center text-primary">YouTube Shorts Generator</h1>
 
 
 
 
 
 
43
 
44
+ <div class="mb-8 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
45
+ <div class="mb-4">
46
+ <label for="niche" class="block text-sm font-medium mb-1">Niche/Topic</label>
47
+ <input type="text" id="niche" placeholder="E.g., Fitness tips, Technology facts, Travel destinations" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
48
+ </div>
49
 
50
+ <div class="mb-4">
51
+ <label for="language" class="block text-sm font-medium mb-1">Language</label>
52
+ <select id="language" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
53
+ <option value="English">English</option>
54
+ <option value="Spanish">Spanish</option>
55
+ <option value="French">French</option>
56
+ <option value="German">German</option>
57
+ <option value="Italian">Italian</option>
58
+ <option value="Portuguese">Portuguese</option>
59
+ <option value="Russian">Russian</option>
60
+ <option value="Japanese">Japanese</option>
61
+ <option value="Chinese">Chinese</option>
62
+ <option value="Hindi">Hindi</option>
63
+ </select>
64
+ </div>
65
+
66
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
67
+ <div>
68
+ <label for="text-generator" class="block text-sm font-medium mb-1">Text Generator</label>
69
+ <select id="text-generator" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
70
+ <option value="Claude-3.7-Sonnet">Claude-3.7-Sonnet</option>
71
+ <option value="GPT-4o">GPT-4o</option>
72
+ <option value="GPT-4o-mini">GPT-4o-mini</option>
73
+ <option value="Gemini-2.0-Flash">Gemini-2.0-Flash</option>
74
+ </select>
75
+ </div>
 
 
 
 
 
 
 
 
 
76
 
77
+ <div>
78
+ <label for="image-generator" class="block text-sm font-medium mb-1">Image Generator</label>
79
+ <select id="image-generator" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
80
+ <option value="FLUX-pro-1.1">FLUX-pro-1.1</option>
81
+ <option value="FLUX-schnell">FLUX-schnell</option>
82
+ <option value="Dall-E-3">Dall-E-3</option>
83
+ </select>
84
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ <div>
87
+ <label for="voice-generator" class="block text-sm font-medium mb-1">Voice Generator</label>
88
+ <select id="voice-generator" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
89
+ <option value="ElevenLabs">ElevenLabs</option>
90
+ <option value="PlayAI-Dialog">PlayAI-Dialog</option>
91
+ </select>
92
+ </div>
93
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ <div>
96
+ <label for="voice-name" class="block text-sm font-medium mb-1">Voice Name (Optional)</label>
97
+ <input type="text" id="voice-name" placeholder="E.g., Sarah, Brian, Lily, Monika Sogam" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base">
98
+ </div>
99
 
100
+ <button id="generate-btn" class="mt-6 w-full bg-primary hover:bg-opacity-90 text-white py-3 px-4 rounded-md font-medium transition duration-200">
101
+ Generate Video
102
+ </button>
103
+ </div>
104
+
105
+ <div id="loading-container" class="hidden flex-col items-center justify-center py-8">
106
+ <div class="loading-spinner mb-4"></div>
107
+ <div id="status-message" class="text-lg font-medium">Generating content...</div>
108
+ <div id="progress-detail" class="text-sm text-gray-500 dark:text-gray-400 mt-2"></div>
109
+ </div>
110
+
111
+ <div id="results-container" class="hidden bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
112
+ <h2 class="text-xl font-bold mb-3">Generated Video</h2>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ <div id="video-player-container" class="mb-6 relative pt-[56.25%]">
115
+ <video id="video-player" controls class="absolute top-0 left-0 w-full h-full rounded-lg">
116
+ Your browser does not support the video tag.
117
+ </video>
118
+ </div>
119
 
120
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
121
+ <div>
122
+ <h3 class="font-medium mb-2">Title</h3>
123
+ <p id="video-title" class="bg-white dark:bg-gray-700 p-3 rounded-md"></p>
124
+ </div>
125
+ <div>
126
+ <h3 class="font-medium mb-2">Description</h3>
127
+ <p id="video-description" class="bg-white dark:bg-gray-700 p-3 rounded-md h-24 overflow-y-auto"></p>
128
+ </div>
129
+ </div>
130
+ </div>
131
+ </div>
132
+
133
+ <script>
134
+ // Initialize dark mode based on user preference
135
+ if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
136
+ document.documentElement.classList.add('dark');
137
+ }
138
+
139
+ window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
140
+ if (event.matches) {
141
+ document.documentElement.classList.add('dark');
142
+ } else {
143
+ document.documentElement.classList.remove('dark');
144
+ }
145
+ });
146
+
147
+ // Handler for generating videos
148
+ document.getElementById('generate-btn').addEventListener('click', async function() {
149
+ // Get input values
150
+ const niche = document.getElementById('niche').value.trim();
151
+ const language = document.getElementById('language').value;
152
+ const textGenerator = document.getElementById('text-generator').value;
153
+ const imageGenerator = document.getElementById('image-generator').value;
154
+ const voiceGenerator = document.getElementById('voice-generator').value;
155
+ const voiceName = document.getElementById('voice-name').value.trim();
156
 
157
+ // Validation
158
+ if (!niche) {
159
+ alert('Please enter a niche/topic');
160
+ return;
161
+ }
162
 
163
+ // Show loading state
164
+ document.getElementById('loading-container').classList.remove('hidden');
165
+ document.getElementById('loading-container').classList.add('flex');
166
+ document.getElementById('results-container').classList.add('hidden');
 
167
 
168
+ try {
169
+ // Updates for the loading message
170
+ updateProgress('Generating topic...');
171
+ await new Promise(resolve => setTimeout(resolve, 1000));
172
+
173
+ // Step 1: Generate topic
174
+ const topic = await generateTopic(niche, textGenerator);
175
+
176
+ // Step 2: Generate script
177
+ updateProgress('Creating script...');
178
+ const script = await generateScript(topic, language, textGenerator);
179
+
180
+ // Step 3: Generate metadata
181
+ updateProgress('Creating title and description...');
182
+ const metadata = await generateMetadata(topic, script, textGenerator);
183
+
184
+ // Step 4: Generate image prompts
185
+ updateProgress('Creating image prompts...');
186
+ const imagePrompts = await generateImagePrompts(topic, script, textGenerator);
187
+
188
+ // Step 5: Generate images
189
+ updateProgress('Generating images...');
190
+ const imageUrls = await generateImages(imagePrompts, imageGenerator);
191
+
192
+ // Step 6: Generate speech
193
+ updateProgress('Creating voiceover...');
194
+ const audioUrl = await generateSpeech(script, language, voiceGenerator, voiceName);
195
+
196
+ // Step 7: Generate video
197
+ updateProgress('Creating final video...');
198
+ const videoUrl = await generateVideo(imageUrls, audioUrl, script);
199
+
200
+ // Display results
201
+ displayResults(videoUrl, metadata.title, metadata.description);
202
+
203
+ } catch (error) {
204
+ console.error('Error:', error);
205
+ document.getElementById('status-message').textContent = 'Error generating video';
206
+ document.getElementById('progress-detail').textContent = error.message || 'An unexpected error occurred';
207
+ }
208
+ });
 
 
 
 
 
 
 
 
209
 
210
+ function updateProgress(message) {
211
+ document.getElementById('progress-detail').textContent = message;
212
+ }
 
 
 
213
 
214
+ // Function to generate topic based on niche
215
+ async function generateTopic(niche, textGeneratorModel) {
216
+ try {
217
+ const prompt = `Please generate a specific video idea that takes about the following topic: ${niche}. Make it exactly one sentence. Only return the topic, nothing else.`;
218
+
219
+ // Use Poe API to send user message
220
+ const handlerId = 'topic-generation-handler';
221
+ let topicResult = '';
222
+
223
+ // Register handler for response
224
+ window.Poe.registerHandler(handlerId, (result) => {
225
+ if (result.responses.length > 0) {
226
+ const response = result.responses[0];
227
+ if (response.status === 'complete') {
228
+ topicResult = response.content.trim();
229
+ }
230
+ }
231
+ });
232
+
233
+ // Send request to generate topic
234
+ await window.Poe.sendUserMessage(`@${textGeneratorModel} ${prompt}`, {
235
+ handler: handlerId,
236
+ stream: false,
237
+ openChat: false
238
+ });
239
+
240
+ // Wait for response to be complete
241
+ while (!topicResult) {
242
+ await new Promise(resolve => setTimeout(resolve, 100));
243
+ }
244
+
245
+ return topicResult;
246
+ } catch (error) {
247
+ console.error('Error generating topic:', error);
248
+ throw new Error('Failed to generate topic');
249
+ }
250
+ }
251
 
252
+ // Function to generate script based on topic
253
+ async function generateScript(topic, language, textGeneratorModel) {
254
+ try {
255
+ const prompt = `
256
+ Generate a script for youtube shorts video, depending on the subject of the video.
257
+
258
+ The script is to be returned as a string with several paragraphs.
259
+
260
+ Get straight to the point, don't start with unnecessary things like, "welcome to this video".
261
+
262
+ Obviously, the script should be related to the subject of the video.
263
+
264
+ YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
265
+ YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
266
+ ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS.
267
+
268
+ Subject: ${topic}
269
+ Language: ${language}
270
+ `;
271
+
272
+ // Use Poe API to send user message
273
+ const handlerId = 'script-generation-handler';
274
+ let scriptResult = '';
275
+
276
+ // Register handler for response
277
+ window.Poe.registerHandler(handlerId, (result) => {
278
+ if (result.responses.length > 0) {
279
+ const response = result.responses[0];
280
+ if (response.status === 'complete') {
281
+ scriptResult = response.content.trim();
282
+ }
283
+ }
284
+ });
285
+
286
+ // Send request to generate script
287
+ await window.Poe.sendUserMessage(`@${textGeneratorModel} ${prompt}`, {
288
+ handler: handlerId,
289
+ stream: false,
290
+ openChat: false
291
+ });
292
+
293
+ // Wait for response to be complete
294
+ while (!scriptResult) {
295
+ await new Promise(resolve => setTimeout(resolve, 100));
296
+ }
297
+
298
+ return scriptResult;
299
+ } catch (error) {
300
+ console.error('Error generating script:', error);
301
+ throw new Error('Failed to generate script');
302
+ }
303
  }
304
+
305
+ // Function to generate metadata (title and description)
306
+ async function generateMetadata(topic, script, textGeneratorModel) {
307
+ try {
308
+ const titlePrompt = `Please generate a YouTube Video Title for the following subject, including hashtags: ${topic}. Only return the title, nothing else. Limit the title under 100 characters.`;
309
+
310
+ // Use Poe API to send user message for title
311
+ const titleHandlerId = 'title-generation-handler';
312
+ let titleResult = '';
313
+
314
+ // Register handler for title response
315
+ window.Poe.registerHandler(titleHandlerId, (result) => {
316
+ if (result.responses.length > 0) {
317
+ const response = result.responses[0];
318
+ if (response.status === 'complete') {
319
+ titleResult = response.content.trim();
320
+ }
321
+ }
322
+ });
323
+
324
+ // Send request to generate title
325
+ await window.Poe.sendUserMessage(`@${textGeneratorModel} ${titlePrompt}`, {
326
+ handler: titleHandlerId,
327
+ stream: false,
328
+ openChat: false
329
+ });
330
+
331
+ // Wait for title response to be complete
332
+ while (!titleResult) {
333
+ await new Promise(resolve => setTimeout(resolve, 100));
334
+ }
335
+
336
+ // Now generate description
337
+ const descPrompt = `Please generate a YouTube Video Description for the following script: ${script}. Only return the description, nothing else.`;
338
+
339
+ // Use Poe API to send user message for description
340
+ const descHandlerId = 'desc-generation-handler';
341
+ let descResult = '';
342
+
343
+ // Register handler for description response
344
+ window.Poe.registerHandler(descHandlerId, (result) => {
345
+ if (result.responses.length > 0) {
346
+ const response = result.responses[0];
347
+ if (response.status === 'complete') {
348
+ descResult = response.content.trim();
349
+ }
350
+ }
351
+ });
352
+
353
+ // Send request to generate description
354
+ await window.Poe.sendUserMessage(`@${textGeneratorModel} ${descPrompt}`, {
355
+ handler: descHandlerId,
356
+ stream: false,
357
+ openChat: false
358
+ });
359
+
360
+ // Wait for description response to be complete
361
+ while (!descResult) {
362
+ await new Promise(resolve => setTimeout(resolve, 100));
363
+ }
364
+
365
+ return {
366
+ title: titleResult,
367
+ description: descResult
368
+ };
369
+ } catch (error) {
370
+ console.error('Error generating metadata:', error);
371
+ throw new Error('Failed to generate title and description');
372
+ }
373
  }
 
 
 
 
 
 
374
 
375
+ // Function to generate image prompts
376
+ async function generateImagePrompts(topic, script, textGeneratorModel) {
377
+ try {
378
+ const prompt = `
379
+ Generate 5 Image Prompts for AI Image Generation,
380
+ depending on the subject of a video.
381
+ Subject: ${topic}
382
+
383
+ The image prompts are to be returned as
384
+ a JSON-Array of strings.
385
+
386
+ Each search term should consist of a full sentence,
387
+ always add the main subject of the video.
388
+
389
+ Be emotional and use interesting adjectives to make the
390
+ Image Prompt as detailed as possible.
391
+
392
+ YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
393
+ YOU MUST NOT RETURN ANYTHING ELSE.
394
+
395
+ For context, here is the full text:
396
+ ${script}
397
+ `;
398
+
399
+ // Use Poe API to send user message
400
+ const handlerId = 'image-prompts-handler';
401
+ let promptsResult = '';
402
+
403
+ // Register handler for response
404
+ window.Poe.registerHandler(handlerId, (result) => {
405
+ if (result.responses.length > 0) {
406
+ const response = result.responses[0];
407
+ if (response.status === 'complete') {
408
+ promptsResult = response.content.trim();
409
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
+ });
 
 
 
412
 
413
+ // Send request to generate image prompts
414
+ await window.Poe.sendUserMessage(`@${textGeneratorModel} ${prompt}`, {
415
+ handler: handlerId,
416
+ stream: false,
417
+ openChat: false
418
+ });
419
+
420
+ // Wait for response to be complete
421
+ while (!promptsResult) {
422
+ await new Promise(resolve => setTimeout(resolve, 100));
423
+ }
424
+
425
+ // Clean and parse the JSON response
426
+ const cleanedResponse = promptsResult
427
+ .replace(/```json/g, '')
428
+ .replace(/```/g, '')
429
+ .trim();
430
+
431
+ try {
432
+ return JSON.parse(cleanedResponse);
433
+ } catch (parseError) {
434
+ // If parsing fails, try to extract the array from the text
435
+ const arrayMatch = cleanedResponse.match(/\[.*\]/s);
436
+ if (arrayMatch) {
437
+ return JSON.parse(arrayMatch[0]);
438
+ }
439
+ throw new Error('Failed to parse image prompts');
440
+ }
441
+ } catch (error) {
442
+ console.error('Error generating image prompts:', error);
443
+ throw new Error('Failed to generate image prompts');
444
+ }
445
+ }
446
+
447
+ // Function to generate images based on prompts
448
+ async function generateImages(imagePrompts, imageGeneratorModel) {
449
+ try {
450
+ const imageUrls = [];
451
+
452
+ for (let i = 0; i < imagePrompts.length; i++) {
453
+ updateProgress(`Generating image ${i+1}/${imagePrompts.length}...`);
454
 
455
+ // Use Poe API to send user message
456
+ const handlerId = `image-generation-handler-${i}`;
457
 
458
+ // Register handler for response
459
+ window.Poe.registerHandler(handlerId, (result) => {
460
+ if (result.responses.length > 0) {
461
+ const response = result.responses[0];
462
+ if (response.status === 'complete' && response.attachments && response.attachments.length > 0) {
463
+ imageUrls.push(response.attachments[0].url);
464
+ }
465
+ }
466
+ });
 
 
 
 
 
467
 
468
+ // Send request to generate image
469
+ await window.Poe.sendUserMessage(`@${imageGeneratorModel} ${imagePrompts[i]}`, {
470
+ handler: handlerId,
471
+ stream: false,
472
+ openChat: false
473
+ });
474
 
475
+ // Wait for a short time to ensure the handler has time to receive the response
476
+ await new Promise(resolve => setTimeout(resolve, 3000));
477
+ }
 
 
 
 
 
478
 
479
+ // Ensure we have at least one image
480
+ if (imageUrls.length === 0) {
481
+ throw new Error('Failed to generate any images');
482
+ }
483
+
484
+ return imageUrls;
485
+ } catch (error) {
486
+ console.error('Error generating images:', error);
487
+ throw new Error('Failed to generate images');
488
+ }
489
+ }
490
+
491
+ // Function to generate speech from script
492
+ async function generateSpeech(script, language, voiceGeneratorModel, voiceName) {
493
+ try {
494
+ // Use Poe API to send user message
495
+ const handlerId = 'speech-generation-handler';
496
+ let audioUrl = null;
497
+
498
+ // Register handler for response
499
+ window.Poe.registerHandler(handlerId, (result) => {
500
+ if (result.responses.length > 0) {
501
+ const response = result.responses[0];
502
+ if (response.status === 'complete' && response.attachments && response.attachments.length > 0) {
503
+ audioUrl = response.attachments[0].url;
504
+ }
505
+ }
506
+ });
507
+
508
+ // Prepare the prompt
509
+ let prompt = script;
510
+ if (voiceName) {
511
+ prompt += ` --voice ${voiceName}`;
512
+ }
513
 
514
+ // Send request to generate speech
515
+ await window.Poe.sendUserMessage(`@${voiceGeneratorModel} ${prompt}`, {
516
+ handler: handlerId,
517
+ stream: false,
518
+ openChat: false
519
+ });
520
 
521
+ // Wait for audio URL to be available
522
+ let attempts = 0;
523
+ while (!audioUrl && attempts < 30) {
524
+ await new Promise(resolve => setTimeout(resolve, 1000));
525
+ attempts++;
526
+ }
527
 
528
+ if (!audioUrl) {
529
+ throw new Error('Failed to generate speech audio');
530
+ }
531
 
532
+ return audioUrl;
533
+ } catch (error) {
534
+ console.error('Error generating speech:', error);
535
+ throw new Error('Failed to generate speech');
536
+ }
537
+ }
538
+
539
+ // Function to generate video by combining images and audio
540
+ async function generateVideo(imageUrls, audioUrl, script) {
541
+ // Here we would normally combine everything into a video
542
+ // Since we can't actually do video processing in the browser easily,
543
+ // we'll simulate it with image slideshow and audio
544
+
545
+ try {
546
+ // Create a simulated video player that shows images as a slideshow with audio
547
+ // This is a simple mockup - in a real application, you would use a video processing service
548
 
549
+ // For this demo, we'll just return the audio URL and use the first image
550
+ // as a placeholder in the video player
551
 
552
+ // In a real implementation, this is where you would call an external video
553
+ // processing service or use a server-side component
554
 
555
+ // Simulate processing time
556
+ await new Promise(resolve => setTimeout(resolve, 3000));
557
 
558
+ // Return a mock video URL (which is just the audio URL for this demo)
559
+ return {
560
+ audioUrl: audioUrl,
561
+ imageUrls: imageUrls
562
+ };
563
+ } catch (error) {
564
+ console.error('Error generating video:', error);
565
+ throw new Error('Failed to generate video');
566
+ }
567
+ }
568
 
569
+ // Function to display results
570
+ function displayResults(videoData, title, description) {
571
+ // Hide loading container
572
+ document.getElementById('loading-container').classList.add('hidden');
573
+ document.getElementById('loading-container').classList.remove('flex');
574
+
575
+ // Show results container
576
+ document.getElementById('results-container').classList.remove('hidden');
577
+
578
+ // Set title and description
579
+ document.getElementById('video-title').textContent = title;
580
+ document.getElementById('video-description').textContent = description;
581
+
582
+ // Set up video player with images slideshow and audio
583
+ const videoPlayer = document.getElementById('video-player');
584
+
585
+ // Create a simple slideshow with the first image and audio
586
+ if (videoData.imageUrls && videoData.imageUrls.length > 0) {
587
+ // Set poster to first image
588
+ videoPlayer.setAttribute('poster', videoData.imageUrls[0]);
589
+ }
590
+
591
+ // Set audio source
592
+ videoPlayer.innerHTML = '';
593
+ const audioSource = document.createElement('source');
594
+ audioSource.src = videoData.audioUrl;
595
+ audioSource.type = 'audio/mpeg';
596
+ videoPlayer.appendChild(audioSource);
597
+
598
+ // Add text explaining this is a simulation
599
+ const textOverlay = document.createElement('div');
600
+ textOverlay.innerHTML = `
601
+ <div class="absolute inset-0 flex items-center justify-center bg-black bg-opacity-50 text-white p-4 text-center">
602
+ <div>
603
+ <p class="font-bold mb-2">Audio Preview</p>
604
+ <p class="text-sm">This is an audio preview. In a full implementation, this would be a video combining the generated images and audio.</p>
605
+ </div>
606
+ </div>
607
+ `;
608
+ videoPlayer.parentNode.appendChild(textOverlay);
609
+
610
+ // Load and play
611
+ videoPlayer.load();
612
+ }
613
+ </script>
614
+ </body>
615
+ </html>