AZILS commited on
Commit
554a992
·
verified ·
1 Parent(s): 889ab84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +699 -488
app.py CHANGED
@@ -31,14 +31,13 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
31
  STATIC_DIR = os.path.join(BASE_DIR, "static")
32
  MUSIC_DIR = os.path.join(STATIC_DIR, "music")
33
  FONTS_DIR = os.path.join(STATIC_DIR, "fonts")
34
- # Use temp directory for faster file operations
35
- CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
36
 
37
  # Create necessary directories
38
  os.makedirs(STATIC_DIR, exist_ok=True)
39
  os.makedirs(MUSIC_DIR, exist_ok=True)
40
  os.makedirs(FONTS_DIR, exist_ok=True)
41
- os.makedirs(CACHE_DIR, exist_ok=True)
42
 
43
  # Helper functions for logging
44
  def info(message):
@@ -425,184 +424,166 @@ class YouTube:
425
  """Generate an image using the selected image generation model."""
426
  self.log(f"Generating image for prompt: {prompt[:50]}...")
427
 
428
- # Use simpler file naming for speed
429
- image_path = os.path.join(CACHE_DIR, f"img_{len(self.images)}_{int(time.time())}.png")
 
 
 
 
430
 
431
- try:
432
- if self.image_gen == "prodia":
433
- self.log("Using Prodia provider for image generation")
434
- s = requests.Session()
435
- headers = {
436
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
437
- }
438
-
439
- # Generate job
440
- self.log("Sending generation request to Prodia API")
441
- resp = s.get(
442
- "https://api.prodia.com/generate",
443
- params={
444
- "new": "true",
445
- "prompt": prompt,
446
- "model": self.image_model,
447
- "negative_prompt": "verybadimagenegative_v1.3",
448
- "steps": "20",
449
- "cfg": "7",
450
- "seed": random.randint(1, 10000),
451
- "sample": "DPM++ 2M Karras",
452
- "aspect_ratio": "square"
453
- },
454
- headers=headers
455
- )
456
-
457
- if resp.status_code != 200:
458
- raise Exception(f"Prodia API error: {resp.text}")
459
-
460
- job_id = resp.json()['job']
461
- self.log(f"Job created with ID: {job_id}")
462
-
463
- # Wait for generation to complete
464
- max_attempts = 30
465
- attempts = 0
466
- while attempts < max_attempts:
467
- attempts += 1
468
- time.sleep(2)
469
- status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
470
-
471
- if status["status"] == "succeeded":
472
- self.log("Image generation successful, downloading result")
473
- img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
474
- with open(image_path, "wb") as f:
475
- f.write(img_data)
476
- self.images.append(image_path)
477
- self.log(success(f"Image saved to: {image_path}"))
478
- return image_path
479
-
480
- elif status["status"] == "failed":
481
- raise Exception(f"Prodia job failed: {status.get('error', 'Unknown error')}")
482
-
483
- # Still processing
484
- self.log(f"Still processing, attempt {attempts}/{max_attempts}...")
485
-
486
- raise Exception("Prodia job timed out")
487
 
488
- elif self.image_gen == "hercai":
489
- self.log("Using Hercai provider for image generation")
490
- url = f"https://hercai.onrender.com/{self.image_model}/text2image?prompt={prompt}"
491
- r = requests.get(url)
492
-
493
- if r.status_code != 200:
494
- raise Exception(f"Hercai API error: {r.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
- parsed = r.json()
497
- if "url" in parsed and parsed["url"]:
498
- self.log("Image URL received from Hercai")
499
- image_url = parsed["url"]
500
- img_data = requests.get(image_url).content
501
  with open(image_path, "wb") as f:
502
  f.write(img_data)
503
  self.images.append(image_path)
504
  self.log(success(f"Image saved to: {image_path}"))
505
  return image_path
506
- else:
507
- raise Exception("No image URL in Hercai response")
508
-
509
- elif self.image_gen == "g4f":
510
- self.log("Using G4F provider for image generation")
511
- from g4f.client import Client
512
- client = Client()
513
- response = client.images.generate(
514
- model=self.image_model,
515
- prompt=prompt,
516
- response_format="url"
517
- )
518
-
519
- if response and response.data and len(response.data) > 0:
520
- image_url = response.data[0].url
521
- image_response = requests.get(image_url)
522
-
523
- if image_response.status_code == 200:
524
- with open(image_path, "wb") as f:
525
- f.write(image_response.content)
526
- self.images.append(image_path)
527
- self.log(success(f"Image saved to: {image_path}"))
528
- return image_path
529
- else:
530
- raise Exception(f"Failed to download image from {image_url}")
531
- else:
532
- raise Exception("No image URL received from G4F")
533
-
534
- elif self.image_gen == "segmind":
535
- self.log("Using Segmind provider for image generation")
536
- api_key = os.environ.get("SEGMIND_API_KEY", "")
537
- if not api_key:
538
- raise ValueError("Segmind API key is not set. Please provide a valid API key.")
539
 
540
- headers = {
541
- "x-api-key": api_key,
542
- "Content-Type": "application/json"
543
- }
544
 
545
- response = requests.post(
546
- "https://api.segmind.com/v1/sdxl-turbo",
547
- json={
548
- "prompt": prompt,
549
- "negative_prompt": "blurry, low quality, distorted face, text, watermark",
550
- "samples": 1,
551
- "size": "1024x1024",
552
- "guidance_scale": 1.0
553
- },
554
- headers=headers
555
- )
556
-
557
- if response.status_code == 200:
558
- with open(image_path, "wb") as f:
559
- f.write(response.content)
560
- self.images.append(image_path)
561
- self.log(success(f"Image saved to: {image_path}"))
562
- return image_path
563
- else:
564
- raise Exception(f"Segmind request failed: {response.status_code} {response.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
- elif self.image_gen == "pollinations":
567
- self.log("Using Pollinations provider for image generation")
568
- response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
569
 
570
- if response.status_code == 200:
571
- self.log("Image received from Pollinations")
572
  with open(image_path, "wb") as f:
573
- f.write(response.content)
574
  self.images.append(image_path)
575
  self.log(success(f"Image saved to: {image_path}"))
576
  return image_path
577
  else:
578
- raise Exception(f"Pollinations request failed with status code: {response.status_code}")
579
-
580
  else:
581
- # Create a fallback colored placeholder image instead of throwing an error
582
- self.log(f"Unknown provider '{self.image_gen}'. Generating placeholder image.")
583
- img = Image.new('RGB', (800, 800), color=(random.randint(0, 255),
584
- random.randint(0, 255),
585
- random.randint(0, 255)))
586
- img.save(image_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  self.images.append(image_path)
588
- self.log(warning(f"Created placeholder image at: {image_path}"))
589
  return image_path
590
-
591
- except Exception as e:
592
- error_msg = f"Image generation failed: {str(e)}"
593
- self.log(error(error_msg))
594
-
595
- # Create a fallback image instead of failing completely
596
- try:
597
- img = Image.new('RGB', (800, 800), color=(200, 200, 200))
598
- image_path = os.path.join(CACHE_DIR, f"error_img_{len(self.images)}_{int(time.time())}.png")
599
- img.save(image_path)
 
600
  self.images.append(image_path)
601
- self.log(warning(f"Created error placeholder image at: {image_path}"))
602
  return image_path
603
- except:
604
- # If all else fails, return None and handle it gracefully
605
- return None
 
 
 
 
 
606
 
607
  def generate_speech(self, text, output_format='mp3') -> str:
608
  """Generate speech from text using the selected TTS engine."""
@@ -614,144 +595,122 @@ class YouTube:
614
 
615
  self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
616
 
617
- # Use simpler file naming for speed
618
- audio_path = os.path.join(CACHE_DIR, f"speech_{int(time.time())}.{output_format}")
 
 
 
 
619
 
620
- try:
621
- if self.tts_engine == "elevenlabs":
622
- self.log("Using ElevenLabs provider for speech generation")
623
- elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY", "")
624
- if not elevenlabs_api_key:
625
- raise ValueError("ElevenLabs API key is not set. Please provide a valid API key.")
626
-
627
- headers = {
628
- "Accept": "audio/mpeg",
629
- "Content-Type": "application/json",
630
- "xi-api-key": elevenlabs_api_key
631
- }
632
-
633
- # Simplified payload to prevent "unusual activity" errors
634
- payload = {
635
- "text": text,
636
- "model_id": "eleven_monolingual_v1", # Use more stable model
637
- "voice_settings": {
638
- "stability": 0.5,
639
- "similarity_boost": 0.5
640
- }
641
- }
642
-
643
- # Map voice names to ElevenLabs voice IDs
644
- voice_id_mapping = {
645
- "Sarah": "21m00Tcm4TlvDq8ikWAM",
646
- "Brian": "hxppwzoRmvxK7YkDrjhQ",
647
- "Lily": "p7TAj7L6QVq1fE6XGyjR",
648
- "Monika Sogam": "Fc3XhIu9tfgOPOsU1hMr",
649
- "George": "o7lPjDgzlF8ZAeSpqmaN",
650
- "River": "f0k5evLkhJxrIRJXQJvy",
651
- "Matilda": "XrExE9yKIg1WjnnlVkGX",
652
- "Will": "pvKWM1B1sNRNTlEYYAEZ",
653
- "Jessica": "A5EAMYWMCSsLNL1wYxOv",
654
- "default": "21m00Tcm4TlvDq8ikWAM" # Default to Sarah
655
- }
656
-
657
- # Get the voice ID from mapping or use the voice name as ID if not found
658
- voice_id = voice_id_mapping.get(self.tts_voice, self.tts_voice)
659
-
660
- self.log(f"Using ElevenLabs voice: {self.tts_voice} (ID: {voice_id})")
661
-
662
- response = requests.post(
663
- url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
664
- json=payload,
665
- headers=headers
666
- )
667
-
668
- if response.status_code == 200:
669
- with open(audio_path, 'wb') as f:
670
- f.write(response.content)
671
- self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
672
- else:
673
- try:
674
- error_data = response.json()
675
- error_message = error_data.get('detail', {}).get('message', response.text)
676
- error_status = error_data.get('status', 'error')
677
- raise Exception(f"ElevenLabs API error ({response.status_code}, {error_status}): {error_message}")
678
- except ValueError:
679
- # If JSON parsing fails, use the raw response
680
- raise Exception(f"ElevenLabs API error ({response.status_code}): {response.text}")
681
-
682
- elif self.tts_engine == "gtts":
683
- self.log("Using Google TTS provider for speech generation")
684
- from gtts import gTTS
685
- tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
686
- tts.save(audio_path)
687
-
688
- elif self.tts_engine == "openai":
689
- self.log("Using OpenAI provider for speech generation")
690
- openai_api_key = os.environ.get("OPENAI_API_KEY", "")
691
- if not openai_api_key:
692
- raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
693
-
694
- from openai import OpenAI
695
- client = OpenAI(api_key=openai_api_key)
696
-
697
- voice = self.tts_voice if self.tts_voice else "alloy"
698
- response = client.audio.speech.create(
699
- model="tts-1",
700
- voice=voice,
701
- input=text
702
- )
703
- response.stream_to_file(audio_path)
704
-
705
- elif self.tts_engine == "edge":
706
- self.log("Using Edge TTS provider for speech generation")
707
- import edge_tts
708
- import asyncio
709
-
710
- voice = self.tts_voice if self.tts_voice else "en-US-AriaNeural"
711
-
712
- async def generate():
713
- communicate = edge_tts.Communicate(text, voice)
714
- await communicate.save(audio_path)
715
-
716
- asyncio.run(generate())
717
 
 
 
 
 
 
 
 
 
 
 
718
  else:
719
- # Default to edge TTS if other methods aren't available
720
- self.log(f"Using default Edge TTS as fallback")
721
- import edge_tts
722
- import asyncio
723
-
724
- voice = "en-US-AriaNeural"
725
-
726
- async def generate():
727
- communicate = edge_tts.Communicate(text, voice)
728
- await communicate.save(audio_path)
729
-
730
- asyncio.run(generate())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
 
732
- self.log(success(f"Speech generated and saved to: {audio_path}"))
733
- self.tts_path = audio_path
734
- return audio_path
 
735
 
736
- except Exception as e:
737
- error_msg = f"Speech generation failed: {str(e)}"
738
- self.log(error(error_msg))
739
 
740
- # Create a silent audio file as fallback
741
- try:
742
- from pydub import AudioSegment
743
- from pydub.generators import Sine
744
-
745
- # Generate 30 seconds of silence
746
- silence = AudioSegment.silent(duration=30000)
747
- silence.export(audio_path, format=output_format)
748
-
749
- self.log(warning(f"Created silent audio fallback at: {audio_path}"))
750
- self.tts_path = audio_path
751
- return audio_path
752
- except:
753
- self.log(error("Failed to create silent audio fallback"))
754
- return None
755
 
756
  def generate_subtitles(self, audio_path: str) -> dict:
757
  """Generate subtitles from audio using AssemblyAI."""
@@ -854,17 +813,24 @@ class YouTube:
854
 
855
  self.log(success(f"Generated {len(subtitles)} subtitle lines"))
856
 
 
 
 
 
 
857
  # Return the subtitle data and settings
858
  return {
859
  "wordlevel": wordlevel_info,
860
  "linelevel": subtitles,
 
861
  "settings": {
862
  "font": FONT,
863
  "fontsize": FONTSIZE,
864
  "color": COLOR,
865
  "bg_color": BG_COLOR,
866
  "position": self.subtitle_position,
867
- "highlighting_enabled": self.highlighting_enabled
 
868
  }
869
  }
870
 
@@ -872,9 +838,99 @@ class YouTube:
872
  error_msg = f"Error generating subtitles: {str(e)}"
873
  self.log(error(error_msg))
874
  raise Exception(error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
875
 
876
  def create_subtitle_clip(self, subtitle_data, frame_size):
877
  """Create subtitle clips for a line of text with word-level highlighting."""
 
 
 
 
878
  settings = subtitle_data["settings"]
879
  font_name = settings["font"]
880
  fontsize = settings["fontsize"]
@@ -882,69 +938,39 @@ class YouTube:
882
  bg_color = settings["bg_color"]
883
  highlighting_enabled = settings["highlighting_enabled"]
884
 
885
- def create_text_clip(text, font_size, color, bg_color=None):
886
- try:
887
- # Try to use the specified font, fallback to default
888
- try:
889
- # Check if font is a path or just a name
890
- font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
891
- if os.path.exists(font_path):
892
- pil_font = ImageFont.truetype(font_path, font_size)
893
- else:
894
- self.log(warning(f"Font {font_name} not found, using default"))
895
- pil_font = ImageFont.load_default()
896
- except Exception as e:
897
- self.log(warning(f"Error loading font: {str(e)}"))
898
- pil_font = ImageFont.load_default()
899
-
900
- # Get text size
901
- text_width, text_height = pil_font.getbbox(text)[2:4]
902
-
903
- # Add padding
904
- padding = 10
905
- img_width = text_width + padding * 2
906
- img_height = text_height + padding * 2
907
-
908
- # Create image with background color or transparent
909
- if bg_color:
910
- if bg_color.startswith('#'):
911
- bg_color_rgb = tuple(int(bg_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
912
- else:
913
- bg_color_rgb = (0, 0, 255) # Default blue
914
- img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
915
- else:
916
- img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
917
-
918
- # Draw text
919
- draw = ImageDraw.Draw(img)
920
- if color.startswith('#'):
921
- text_color_rgb = tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
922
- else:
923
- text_color_rgb = (255, 255, 255) # Default white
924
-
925
- draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
926
-
927
- # Convert to numpy array for MoviePy
928
- img_array = np.array(img)
929
- clip = ImageClip(img_array)
930
- return clip, img_width, img_height
931
-
932
- except Exception as e:
933
- self.log(warning(f"Error creating text clip: {str(e)}"))
934
- # Create a simple colored rectangle as fallback
935
- img = Image.new('RGB', (100, 50), color=(100, 100, 100))
936
- img_array = np.array(img)
937
- clip = ImageClip(img_array)
938
- return clip, 100, 50
939
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  subtitle_clips = []
941
 
942
- for line in subtitle_data["linelevel"]:
943
- x_pos = 0
944
- y_pos = 0
945
- word_positions = []
946
 
947
- # Calculate vertical position based on subtitle position setting
948
  if settings["position"] == "top":
949
  y_buffer = frame_size[1] * 0.1 # 10% from top
950
  elif settings["position"] == "middle":
@@ -952,70 +978,213 @@ class YouTube:
952
  else: # bottom
953
  y_buffer = frame_size[1] * 0.7 # 70% from top
954
 
955
- x_buffer = frame_size[0] * 0.1 # 10% from left
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
  space_width = 20
957
 
958
- # Create clips for each word in the line
959
- for word_data in line["words"]:
960
- word = word_data["word"]
961
- start_time = word_data["start"]
962
- end_time = word_data["end"]
963
- duration = end_time - start_time
964
-
965
- # Create word clip
966
- word_clip, word_width, word_height = create_text_clip(word, fontsize, color)
967
-
968
- # Check if word fits on current line
969
- if x_pos + word_width + space_width > frame_size[0] - 2 * x_buffer:
970
- x_pos = 0
971
- y_pos += word_height + 20
972
-
973
- # Store word position info
974
- word_positions.append({
975
- "word": word,
976
- "x_pos": x_pos + x_buffer,
977
- "y_pos": y_pos + y_buffer,
978
- "width": word_width,
979
- "height": word_height,
980
- "start": start_time,
981
- "end": end_time
982
- })
983
-
984
- # Set position and timing for word clip
985
- word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
986
- word_clip = word_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
987
- subtitle_clips.append(word_clip)
988
-
989
- # Add space after word
990
- space_clip, _, _ = create_text_clip(" ", fontsize, color)
991
- space_clip = space_clip.set_position((x_pos + word_width + x_buffer, y_pos + y_buffer))
992
- space_clip = space_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
993
- subtitle_clips.append(space_clip)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
994
 
995
- x_pos += word_width + space_width
996
-
997
- # Add highlighted words if enabled
998
- if highlighting_enabled and bg_color:
999
- for word_pos in word_positions:
1000
- highlight_clip, _, _ = create_text_clip(
1001
- word_pos["word"],
1002
- fontsize,
1003
- color,
1004
- bg_color
1005
- )
1006
- highlight_clip = highlight_clip.set_position((word_pos["x_pos"], word_pos["y_pos"]))
1007
- highlight_clip = highlight_clip.set_start(word_pos["start"]).set_duration(word_pos["end"] - word_pos["start"])
1008
- subtitle_clips.append(highlight_clip)
1009
-
1010
- return subtitle_clips
1011
 
1012
  def combine(self) -> str:
1013
  """Combine images, audio, and subtitles into a final video."""
1014
  self.progress(0.8, desc="Creating final video")
1015
  self.log("Combining images and audio into final video")
1016
  try:
1017
- # Use simple file naming for faster processing
1018
- output_path = os.path.join(CACHE_DIR, f"output_{int(time.time())}.mp4")
 
 
 
1019
 
1020
  # Check for required files
1021
  if not self.images:
@@ -1032,64 +1201,75 @@ class YouTube:
1032
  num_images = len(self.images)
1033
  req_dur = max_duration / num_images
1034
 
1035
- # Create video clips from images
 
1036
  clips = []
1037
  tot_dur = 0
1038
 
1039
- # Loop through images, repeating if necessary to fill audio duration
1040
- while tot_dur < max_duration:
1041
- for image_path in self.images:
1042
- # Check if image exists and is valid
1043
- if not os.path.exists(image_path):
1044
- self.log(warning(f"Image not found: {image_path}, skipping"))
1045
- continue
 
 
 
 
 
 
 
 
1046
 
1047
- try:
1048
- clip = ImageClip(image_path)
1049
- clip = clip.set_duration(req_dur)
1050
- clip = clip.set_fps(30)
1051
-
1052
- # Handle aspect ratio (vertical video for shorts)
1053
- aspect_ratio = 9/16 # Standard vertical video ratio
1054
- if clip.w / clip.h < aspect_ratio:
1055
- # Image is too tall, crop height
1056
- clip = crop(
1057
- clip,
1058
- width=clip.w,
1059
- height=round(clip.w / aspect_ratio),
1060
- x_center=clip.w / 2,
1061
- y_center=clip.h / 2
1062
- )
1063
- else:
1064
- # Image is too wide, crop width
1065
- clip = crop(
1066
- clip,
1067
- width=round(aspect_ratio * clip.h),
1068
- height=clip.h,
1069
- x_center=clip.w / 2,
1070
- y_center=clip.h / 2
1071
- )
1072
-
1073
- # Resize to standard size for shorts
1074
- clip = clip.resize((1080, 1920))
1075
- clips.append(clip)
1076
- tot_dur += clip.duration
1077
-
1078
- # If we've exceeded the duration, break
1079
- if tot_dur >= max_duration:
1080
- break
1081
- except Exception as e:
1082
- self.log(warning(f"Error processing image {image_path}: {str(e)}"))
1083
 
1084
  # Create video from clips
1085
  self.log(f"Creating video from {len(clips)} clips")
1086
  final_clip = concatenate_videoclips(clips)
1087
  final_clip = final_clip.set_fps(30)
1088
 
1089
- # Add subtitles if enabled
 
1090
  if self.subtitles_enabled and hasattr(self, 'subtitle_data'):
1091
- subtitle_clips = self.create_subtitle_clip(self.subtitle_data, (1080, 1920))
1092
- final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
 
 
1093
 
1094
  # Add background music if available
1095
  music_path = None
@@ -1121,7 +1301,7 @@ class YouTube:
1121
  # Set final audio
1122
  final_clip = final_clip.set_audio(final_audio)
1123
 
1124
- # Write final video - use faster encoding settings
1125
  self.log("Writing final video file")
1126
  final_clip.write_videofile(
1127
  output_path,
@@ -1129,7 +1309,7 @@ class YouTube:
1129
  codec="libx264",
1130
  audio_codec="aac",
1131
  threads=4,
1132
- # Remove preset parameter for faster encoding
1133
  )
1134
 
1135
  self.log(success(f"Video saved to: {output_path}"))
@@ -1138,34 +1318,33 @@ class YouTube:
1138
  except Exception as e:
1139
  error_msg = f"Error combining video: {str(e)}"
1140
  self.log(error(error_msg))
1141
-
1142
- # Create a minimal fallback video if possible
1143
- try:
1144
- # Try to create a simple video with just the first image and audio
1145
- fallback_path = os.path.join(CACHE_DIR, f"fallback_{int(time.time())}.mp4")
1146
-
1147
- if self.images and os.path.exists(self.images[0]) and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
1148
- img_clip = ImageClip(self.images[0]).set_duration(10)
1149
- img_clip = img_clip.resize((1080, 1920))
1150
- audio_clip = AudioFileClip(self.tts_path).subclip(0, min(10, AudioFileClip(self.tts_path).duration))
1151
- video_clip = img_clip.set_audio(audio_clip)
1152
- video_clip.write_videofile(fallback_path, threads=2, codec='libx264', audio_codec='aac')
1153
-
1154
- self.log(warning(f"Created fallback video at: {fallback_path}"))
1155
- return fallback_path
1156
- else:
1157
- raise Exception("Cannot create fallback video: missing images or audio")
1158
- except Exception as fallback_error:
1159
- self.log(error(f"Failed to create fallback video: {str(fallback_error)}"))
1160
- return None
1161
 
1162
  def generate_video(self) -> dict:
1163
  """Generate complete video with all components."""
1164
  try:
1165
  self.log("Starting video generation process")
1166
 
1167
- # Create a simple generation directory - avoid complex numbering schemes
1168
- self.generation_folder = os.path.join(CACHE_DIR, f"gen_{int(time.time())}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1169
  os.makedirs(self.generation_folder, exist_ok=True)
1170
  self.log(f"Created generation folder: {self.generation_folder}")
1171
 
@@ -1206,8 +1385,46 @@ class YouTube:
1206
  self.progress(0.7, desc="Generating subtitles")
1207
  if self.subtitles_enabled and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
1208
  self.subtitle_data = self.generate_subtitles(self.tts_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
 
1210
- # Step 8: Combine all elements into final video
1211
  self.progress(0.8, desc="Creating final video")
1212
  self.log("Combining all elements into final video")
1213
  path = self.combine()
@@ -1229,13 +1446,7 @@ class YouTube:
1229
  except Exception as e:
1230
  error_msg = f"Error during video generation: {str(e)}"
1231
  self.log(error(error_msg))
1232
-
1233
- # Return basic data even on error
1234
- return {
1235
- 'video_path': getattr(self, 'video_path', None),
1236
- 'error': str(e),
1237
- 'logs': self.logs
1238
- }
1239
 
1240
  # Data for dynamic dropdowns
1241
  def get_text_generator_models(generator):
@@ -1377,12 +1588,12 @@ def create_interface():
1377
  text_gen = gr.Dropdown(
1378
  choices=["g4f", "gemini", "openai"],
1379
  label="Text Generator",
1380
- value="g4f"
1381
  )
1382
  text_model = gr.Dropdown(
1383
  choices=get_text_generator_models("g4f"),
1384
  label="Text Model",
1385
- value="gpt-4"
1386
  )
1387
 
1388
  with gr.TabItem("Image"):
@@ -1621,7 +1832,7 @@ if __name__ == "__main__":
1621
  os.makedirs(STATIC_DIR, exist_ok=True)
1622
  os.makedirs(MUSIC_DIR, exist_ok=True)
1623
  os.makedirs(FONTS_DIR, exist_ok=True)
1624
- os.makedirs(CACHE_DIR, exist_ok=True)
1625
 
1626
  # Launch the app
1627
  demo = create_interface()
 
31
  STATIC_DIR = os.path.join(BASE_DIR, "static")
32
  MUSIC_DIR = os.path.join(STATIC_DIR, "music")
33
  FONTS_DIR = os.path.join(STATIC_DIR, "fonts")
34
+ STORAGE_DIR = os.path.join(BASE_DIR, "storage")
 
35
 
36
  # Create necessary directories
37
  os.makedirs(STATIC_DIR, exist_ok=True)
38
  os.makedirs(MUSIC_DIR, exist_ok=True)
39
  os.makedirs(FONTS_DIR, exist_ok=True)
40
+ os.makedirs(STORAGE_DIR, exist_ok=True)
41
 
42
  # Helper functions for logging
43
  def info(message):
 
424
  """Generate an image using the selected image generation model."""
425
  self.log(f"Generating image for prompt: {prompt[:50]}...")
426
 
427
+ # Always save images directly to the generation folder when it exists
428
+ if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
429
+ image_path = os.path.join(self.generation_folder, f"img_{uuid.uuid4()}_{int(time.time())}.png")
430
+ else:
431
+ # Use STORAGE_DIR if no generation folder
432
+ image_path = os.path.join(STORAGE_DIR, f"img_{uuid.uuid4()}_{int(time.time())}.png")
433
 
434
+ if self.image_gen == "prodia":
435
+ self.log("Using Prodia provider for image generation")
436
+ s = requests.Session()
437
+ headers = {
438
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
439
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
+ # Generate job
442
+ self.log("Sending generation request to Prodia API")
443
+ resp = s.get(
444
+ "https://api.prodia.com/generate",
445
+ params={
446
+ "new": "true",
447
+ "prompt": prompt,
448
+ "model": self.image_model,
449
+ "negative_prompt": "verybadimagenegative_v1.3",
450
+ "steps": "20",
451
+ "cfg": "7",
452
+ "seed": random.randint(1, 10000),
453
+ "sample": "DPM++ 2M Karras",
454
+ "aspect_ratio": "square"
455
+ },
456
+ headers=headers
457
+ )
458
+
459
+ if resp.status_code != 200:
460
+ raise Exception(f"Prodia API error: {resp.text}")
461
+
462
+ job_id = resp.json()['job']
463
+ self.log(f"Job created with ID: {job_id}")
464
+
465
+ # Wait for generation to complete
466
+ max_attempts = 30
467
+ attempts = 0
468
+ while attempts < max_attempts:
469
+ attempts += 1
470
+ time.sleep(2)
471
+ status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
472
 
473
+ if status["status"] == "succeeded":
474
+ self.log("Image generation successful, downloading result")
475
+ img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
 
 
476
  with open(image_path, "wb") as f:
477
  f.write(img_data)
478
  self.images.append(image_path)
479
  self.log(success(f"Image saved to: {image_path}"))
480
  return image_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
+ elif status["status"] == "failed":
483
+ raise Exception(f"Prodia job failed: {status.get('error', 'Unknown error')}")
 
 
484
 
485
+ # Still processing
486
+ self.log(f"Still processing, attempt {attempts}/{max_attempts}...")
487
+
488
+ raise Exception("Prodia job timed out")
489
+
490
+ elif self.image_gen == "hercai":
491
+ self.log("Using Hercai provider for image generation")
492
+ url = f"https://hercai.onrender.com/{self.image_model}/text2image?prompt={prompt}"
493
+ r = requests.get(url)
494
+
495
+ if r.status_code != 200:
496
+ raise Exception(f"Hercai API error: {r.text}")
497
+
498
+ parsed = r.json()
499
+ if "url" in parsed and parsed["url"]:
500
+ self.log("Image URL received from Hercai")
501
+ image_url = parsed["url"]
502
+ img_data = requests.get(image_url).content
503
+ with open(image_path, "wb") as f:
504
+ f.write(img_data)
505
+ self.images.append(image_path)
506
+ self.log(success(f"Image saved to: {image_path}"))
507
+ return image_path
508
+ else:
509
+ raise Exception("No image URL in Hercai response")
510
+
511
+ elif self.image_gen == "g4f":
512
+ self.log("Using G4F provider for image generation")
513
+ from g4f.client import Client
514
+ client = Client()
515
+ response = client.images.generate(
516
+ model=self.image_model,
517
+ prompt=prompt,
518
+ response_format="url"
519
+ )
520
 
521
+ if response and response.data and len(response.data) > 0:
522
+ image_url = response.data[0].url
523
+ image_response = requests.get(image_url)
524
 
525
+ if image_response.status_code == 200:
 
526
  with open(image_path, "wb") as f:
527
+ f.write(image_response.content)
528
  self.images.append(image_path)
529
  self.log(success(f"Image saved to: {image_path}"))
530
  return image_path
531
  else:
532
+ raise Exception(f"Failed to download image from {image_url}")
 
533
  else:
534
+ raise Exception("No image URL received from G4F")
535
+
536
+ elif self.image_gen == "segmind":
537
+ self.log("Using Segmind provider for image generation")
538
+ api_key = os.environ.get("SEGMIND_API_KEY", "")
539
+ if not api_key:
540
+ raise ValueError("Segmind API key is not set. Please provide a valid API key.")
541
+
542
+ headers = {
543
+ "x-api-key": api_key,
544
+ "Content-Type": "application/json"
545
+ }
546
+
547
+ response = requests.post(
548
+ "https://api.segmind.com/v1/sdxl-turbo",
549
+ json={
550
+ "prompt": prompt,
551
+ "negative_prompt": "blurry, low quality, distorted face, text, watermark",
552
+ "samples": 1,
553
+ "size": "1024x1024",
554
+ "guidance_scale": 1.0
555
+ },
556
+ headers=headers
557
+ )
558
+
559
+ if response.status_code == 200:
560
+ with open(image_path, "wb") as f:
561
+ f.write(response.content)
562
  self.images.append(image_path)
563
+ self.log(success(f"Image saved to: {image_path}"))
564
  return image_path
565
+ else:
566
+ raise Exception(f"Segmind request failed: {response.status_code} {response.text}")
567
+
568
+ elif self.image_gen == "pollinations":
569
+ self.log("Using Pollinations provider for image generation")
570
+ response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
571
+
572
+ if response.status_code == 200:
573
+ self.log("Image received from Pollinations")
574
+ with open(image_path, "wb") as f:
575
+ f.write(response.content)
576
  self.images.append(image_path)
577
+ self.log(success(f"Image saved to: {image_path}"))
578
  return image_path
579
+ else:
580
+ raise Exception(f"Pollinations request failed with status code: {response.status_code}")
581
+
582
+ else:
583
+ # No fallback, raise an exception for unsupported image generator
584
+ error_msg = f"Unsupported image generator: {self.image_gen}"
585
+ self.log(error(error_msg))
586
+ raise ValueError(error_msg)
587
 
588
  def generate_speech(self, text, output_format='mp3') -> str:
589
  """Generate speech from text using the selected TTS engine."""
 
595
 
596
  self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
597
 
598
+ # Always save to the generation folder when available
599
+ if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
600
+ audio_path = os.path.join(self.generation_folder, f"speech_{uuid.uuid4()}_{int(time.time())}.{output_format}")
601
+ else:
602
+ # Use STORAGE_DIR if no generation folder
603
+ audio_path = os.path.join(STORAGE_DIR, f"speech_{uuid.uuid4()}_{int(time.time())}.{output_format}")
604
 
605
+ if self.tts_engine == "elevenlabs":
606
+ self.log("Using ElevenLabs provider for speech generation")
607
+ elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY", "")
608
+ if not elevenlabs_api_key:
609
+ raise ValueError("ElevenLabs API key is not set. Please provide a valid API key.")
610
+
611
+ headers = {
612
+ "Accept": "audio/mpeg",
613
+ "Content-Type": "application/json",
614
+ "xi-api-key": elevenlabs_api_key
615
+ }
616
+
617
+ payload = {
618
+ "text": text,
619
+ "model_id": "eleven_turbo_v2", # Using latest and most capable model
620
+ "voice_settings": {
621
+ "stability": 0.5,
622
+ "similarity_boost": 0.5,
623
+ "style": 0.0,
624
+ "use_speaker_boost": True
625
+ },
626
+ "output_format": "mp3_44100_128", # Higher quality audio (44.1kHz, 128kbps)
627
+ "optimize_streaming_latency": 0 # Optimize for quality over latency
628
+ }
629
+
630
+ # Map voice names to ElevenLabs voice IDs
631
+ voice_id_mapping = {
632
+ "Sarah": "21m00Tcm4TlvDq8ikWAM",
633
+ "Brian": "hxppwzoRmvxK7YkDrjhQ",
634
+ "Lily": "p7TAj7L6QVq1fE6XGyjR",
635
+ "Monika Sogam": "Fc3XhIu9tfgOPOsU1hMr",
636
+ "George": "o7lPjDgzlF8ZAeSpqmaN",
637
+ "River": "f0k5evLkhJxrIRJXQJvy",
638
+ "Matilda": "XrExE9yKIg1WjnnlVkGX",
639
+ "Will": "pvKWM1B1sNRNTlEYYAEZ",
640
+ "Jessica": "A5EAMYWMCSsLNL1wYxOv",
641
+ "default": "21m00Tcm4TlvDq8ikWAM" # Default to Sarah
642
+ }
643
+
644
+ # Get the voice ID from mapping or use the voice name as ID if not found
645
+ voice_id = voice_id_mapping.get(self.tts_voice, self.tts_voice)
646
+
647
+ self.log(f"Using ElevenLabs voice: {self.tts_voice} (ID: {voice_id})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648
 
649
+ response = requests.post(
650
+ url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
651
+ json=payload,
652
+ headers=headers
653
+ )
654
+
655
+ if response.status_code == 200:
656
+ with open(audio_path, 'wb') as f:
657
+ f.write(response.content)
658
+ self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
659
  else:
660
+ try:
661
+ error_data = response.json()
662
+ error_message = error_data.get('detail', {}).get('message', response.text)
663
+ error_status = error_data.get('status', 'error')
664
+ raise Exception(f"ElevenLabs API error ({response.status_code}, {error_status}): {error_message}")
665
+ except ValueError:
666
+ # If JSON parsing fails, use the raw response
667
+ raise Exception(f"ElevenLabs API error ({response.status_code}): {response.text}")
668
+
669
+ elif self.tts_engine == "gtts":
670
+ self.log("Using Google TTS provider for speech generation")
671
+ from gtts import gTTS
672
+ tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
673
+ tts.save(audio_path)
674
+
675
+ elif self.tts_engine == "openai":
676
+ self.log("Using OpenAI provider for speech generation")
677
+ openai_api_key = os.environ.get("OPENAI_API_KEY", "")
678
+ if not openai_api_key:
679
+ raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
680
+
681
+ from openai import OpenAI
682
+ client = OpenAI(api_key=openai_api_key)
683
+
684
+ voice = self.tts_voice if self.tts_voice else "alloy"
685
+ response = client.audio.speech.create(
686
+ model="tts-1",
687
+ voice=voice,
688
+ input=text
689
+ )
690
+ response.stream_to_file(audio_path)
691
 
692
+ elif self.tts_engine == "edge":
693
+ self.log("Using Edge TTS provider for speech generation")
694
+ import edge_tts
695
+ import asyncio
696
 
697
+ voice = self.tts_voice if self.tts_voice else "en-US-AriaNeural"
 
 
698
 
699
+ async def generate():
700
+ communicate = edge_tts.Communicate(text, voice)
701
+ await communicate.save(audio_path)
702
+
703
+ asyncio.run(generate())
704
+
705
+ else:
706
+ # No fallback, raise an exception for unsupported TTS engine
707
+ error_msg = f"Unsupported TTS engine: {self.tts_engine}"
708
+ self.log(error(error_msg))
709
+ raise ValueError(error_msg)
710
+
711
+ self.log(success(f"Speech generated and saved to: {audio_path}"))
712
+ self.tts_path = audio_path
713
+ return audio_path
714
 
715
  def generate_subtitles(self, audio_path: str) -> dict:
716
  """Generate subtitles from audio using AssemblyAI."""
 
813
 
814
  self.log(success(f"Generated {len(subtitles)} subtitle lines"))
815
 
816
+ # Pre-wrap subtitle lines for more efficient rendering
817
+ self.log("Pre-calculating subtitle line wrapping...")
818
+ wrapped_subtitles = self._pre_wrap_subtitle_lines(subtitles, FRAME_SIZE, FONT, FONTSIZE)
819
+ self.log(success(f"Pre-wrapped {len(wrapped_subtitles)} subtitle lines"))
820
+
821
  # Return the subtitle data and settings
822
  return {
823
  "wordlevel": wordlevel_info,
824
  "linelevel": subtitles,
825
+ "wrappedlines": wrapped_subtitles,
826
  "settings": {
827
  "font": FONT,
828
  "fontsize": FONTSIZE,
829
  "color": COLOR,
830
  "bg_color": BG_COLOR,
831
  "position": self.subtitle_position,
832
+ "highlighting_enabled": self.highlighting_enabled,
833
+ "subtitles_enabled": self.subtitles_enabled
834
  }
835
  }
836
 
 
838
  error_msg = f"Error generating subtitles: {str(e)}"
839
  self.log(error(error_msg))
840
  raise Exception(error_msg)
841
+
842
+ def _pre_wrap_subtitle_lines(self, subtitles, frame_size, font_name, font_size):
843
+ """Pre-calculate line wrapping for subtitles based on video dimensions."""
844
+ self.log("Pre-calculating subtitle line wrapping")
845
+
846
+ # Load the font once
847
+ try:
848
+ font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
849
+ if os.path.exists(font_path):
850
+ pil_font = ImageFont.truetype(font_path, font_size)
851
+ else:
852
+ self.log(warning(f"Font {font_name} not found, using default"))
853
+ pil_font = ImageFont.load_default()
854
+ except Exception as e:
855
+ self.log(warning(f"Error loading font: {str(e)}"))
856
+ pil_font = ImageFont.load_default()
857
+
858
+ # Calculate max width for text (80% of frame width)
859
+ max_width = frame_size[0] * 0.8
860
+ x_buffer = frame_size[0] * 0.1 # 10% buffer on each side
861
+ space_width = 20 # Approximate space width
862
+
863
+ wrapped_subtitles = []
864
+
865
+ for line in subtitles:
866
+ # Process the line into visual lines with exact positions
867
+ visual_lines = []
868
+ current_line = []
869
+ current_x = 0
870
+ line_number = 0
871
+
872
+ # Break points for natural text wrapping
873
+ break_points = {'.', ',', '!', '?', ';', ':', '-', '—'}
874
+
875
+ for word_data in line["words"]:
876
+ word = word_data["word"]
877
+ # Get word width including space
878
+ try:
879
+ word_width = pil_font.getbbox(word)[2] + space_width
880
+ except:
881
+ # Fallback if getbbox fails
882
+ word_width = len(word) * (font_size // 2) + space_width
883
+
884
+ # Check if word contains a break point
885
+ has_break = any(char in break_points for char in word)
886
+
887
+ # If this word would overflow or has a break point, start a new visual line
888
+ if (current_x + word_width > max_width and current_line) or (has_break and current_line and current_x > max_width * 0.7):
889
+ # Store this completed visual line
890
+ visual_line_text = " ".join(w["word"] for w in current_line)
891
+ visual_lines.append({
892
+ "line_number": line_number,
893
+ "text": visual_line_text,
894
+ "words": current_line.copy()
895
+ })
896
+ current_line = []
897
+ current_x = 0
898
+ line_number += 1
899
+
900
+ # Add word position information
901
+ positioned_word = word_data.copy()
902
+ positioned_word["x_offset"] = current_x
903
+ positioned_word["y_line"] = line_number
904
+ positioned_word["width"] = word_width
905
+
906
+ current_line.append(positioned_word)
907
+ current_x += word_width
908
+
909
+ # Add the last line if it exists
910
+ if current_line:
911
+ visual_line_text = " ".join(w["word"] for w in current_line)
912
+ visual_lines.append({
913
+ "line_number": line_number,
914
+ "text": visual_line_text,
915
+ "words": current_line
916
+ })
917
+
918
+ # Return the wrapped line with visual formatting
919
+ wrapped_subtitles.append({
920
+ "original_text": line["text"],
921
+ "start": line["start"],
922
+ "end": line["end"],
923
+ "visual_lines": visual_lines
924
+ })
925
+
926
+ return wrapped_subtitles
927
 
928
  def create_subtitle_clip(self, subtitle_data, frame_size):
929
  """Create subtitle clips for a line of text with word-level highlighting."""
930
+ # Early return if subtitles are disabled
931
+ if not self.subtitles_enabled:
932
+ return []
933
+
934
  settings = subtitle_data["settings"]
935
  font_name = settings["font"]
936
  fontsize = settings["fontsize"]
 
938
  bg_color = settings["bg_color"]
939
  highlighting_enabled = settings["highlighting_enabled"]
940
 
941
+ # Pre-calculate text and background colors once
942
+ if color.startswith('#'):
943
+ text_color_rgb = tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
944
+ else:
945
+ text_color_rgb = (255, 255, 255) # Default white
946
+
947
+ if bg_color and bg_color.startswith('#'):
948
+ bg_color_rgb = tuple(int(bg_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
949
+ else:
950
+ bg_color_rgb = (0, 0, 255) # Default blue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951
 
952
+ # Load font only once
953
+ try:
954
+ font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
955
+ if os.path.exists(font_path):
956
+ pil_font = ImageFont.truetype(font_path, fontsize)
957
+ else:
958
+ self.log(warning(f"Font {font_name} not found, using default"))
959
+ pil_font = ImageFont.load_default()
960
+ except Exception as e:
961
+ self.log(warning(f"Error loading font: {str(e)}"))
962
+ pil_font = ImageFont.load_default()
963
+
964
+ # Pre-calculate common values
965
+ padding = 10
966
  subtitle_clips = []
967
 
968
+ # Check if we have pre-wrapped lines (faster method)
969
+ if "wrappedlines" in subtitle_data and subtitle_data["wrappedlines"]:
970
+ self.log("Using pre-wrapped subtitle lines for faster rendering")
971
+ wrapped_subtitles = subtitle_data["wrappedlines"]
972
 
973
+ # Calculate vertical position offset based on subtitle position setting
974
  if settings["position"] == "top":
975
  y_buffer = frame_size[1] * 0.1 # 10% from top
976
  elif settings["position"] == "middle":
 
978
  else: # bottom
979
  y_buffer = frame_size[1] * 0.7 # 70% from top
980
 
981
+ # Create optimized text clip function that reuses font and color calculations
982
+ def create_text_clip(text, bg_color=None):
983
+ try:
984
+ # Get text size
985
+ text_width, text_height = pil_font.getbbox(text)[2:4]
986
+
987
+ # Add padding
988
+ img_width = text_width + padding * 2
989
+ img_height = text_height + padding * 2
990
+
991
+ # Create image with background color or transparent
992
+ if bg_color:
993
+ img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
994
+ else:
995
+ img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
996
+
997
+ # Draw text
998
+ draw = ImageDraw.Draw(img)
999
+ draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
1000
+
1001
+ # Convert to numpy array for MoviePy
1002
+ img_array = np.array(img)
1003
+ clip = ImageClip(img_array)
1004
+ return clip, img_width, img_height
1005
+
1006
+ except Exception as e:
1007
+ self.log(warning(f"Error creating text clip: {str(e)}"))
1008
+ # Create a simple colored rectangle as fallback
1009
+ img = Image.new('RGB', (100, 50), color=(100, 100, 100))
1010
+ img_array = np.array(img)
1011
+ clip = ImageClip(img_array)
1012
+ return clip, 100, 50
1013
+
1014
+ # Process each pre-wrapped line
1015
+ for wrapped_line in wrapped_subtitles:
1016
+ line_start = wrapped_line["start"]
1017
+ line_end = wrapped_line["end"]
1018
+ line_duration = line_end - line_start
1019
+
1020
+ # Process each visual line separately
1021
+ for visual_line in wrapped_line["visual_lines"]:
1022
+ line_number = visual_line["line_number"]
1023
+ line_text = visual_line["text"]
1024
+
1025
+ # Calculate vertical position including line number offset
1026
+ line_y = y_buffer + (line_number * (fontsize + 20))
1027
+
1028
+ # Create the line clip
1029
+ line_clip, line_width, _ = create_text_clip(line_text)
1030
+ line_clip = line_clip.set_position(('center', line_y))
1031
+ line_clip = line_clip.set_start(line_start).set_duration(line_duration)
1032
+ subtitle_clips.append(line_clip)
1033
+
1034
+ # Add word highlights if enabled
1035
+ if highlighting_enabled and bg_color:
1036
+ # Calculate center offset for word positioning
1037
+ center_offset = (frame_size[0] - line_width) / 2
1038
+
1039
+ for word_data in visual_line["words"]:
1040
+ word = word_data["word"]
1041
+ word_start = word_data["start"]
1042
+ word_end = word_data["end"]
1043
+ x_offset = word_data["x_offset"]
1044
+
1045
+ # Create highlight clip
1046
+ highlight_clip, _, _ = create_text_clip(word, bg_color)
1047
+ highlight_clip = highlight_clip.set_position((center_offset + x_offset, line_y))
1048
+ highlight_clip = highlight_clip.set_start(word_start).set_duration(word_end - word_start)
1049
+ subtitle_clips.append(highlight_clip)
1050
+
1051
+ return subtitle_clips
1052
+
1053
+ # Fallback to old method if pre-wrapped lines aren't available
1054
+ else:
1055
+ self.log("Using standard subtitle rendering method")
1056
+
1057
+ # Legacy code for compatibility (should not normally be used)
1058
+ # (existing code from current create_subtitle_clip method)
1059
  space_width = 20
1060
 
1061
+ # Process each line
1062
+ for line in subtitle_data["linelevel"]:
1063
+ # Calculate vertical position once per line
1064
+ if settings["position"] == "top":
1065
+ y_buffer = frame_size[1] * 0.1 # 10% from top
1066
+ elif settings["position"] == "middle":
1067
+ y_buffer = frame_size[1] * 0.4 # 40% from top
1068
+ else: # bottom
1069
+ y_buffer = frame_size[1] * 0.7 # 70% from top
1070
+
1071
+ x_buffer = frame_size[0] * 0.1 # 10% from left
1072
+
1073
+ # Process line in batches where possible
1074
+ x_pos = 0
1075
+ y_pos = 0
1076
+ word_positions = []
1077
+ line_duration = line["end"] - line["start"]
1078
+
1079
+ # Pre-calculate word metrics to avoid redundant calculations
1080
+ word_metrics = []
1081
+ for word_data in line["words"]:
1082
+ word = word_data["word"]
1083
+ # Get word width including space
1084
+ try:
1085
+ word_width = pil_font.getbbox(word)[2] + space_width
1086
+ except:
1087
+ # Fallback if getbbox fails
1088
+ word_width = len(word) * (fontsize // 2) + space_width
1089
+
1090
+ word_metrics.append({
1091
+ "word": word,
1092
+ "width": word_width,
1093
+ "height": fontsize,
1094
+ "start": word_data["start"],
1095
+ "end": word_data["end"]
1096
+ })
1097
+
1098
+ # Create optimized text clip function
1099
+ def create_text_clip(text, bg_color=None):
1100
+ try:
1101
+ # Get text size
1102
+ text_width, text_height = pil_font.getbbox(text)[2:4]
1103
+
1104
+ # Add padding
1105
+ img_width = text_width + padding * 2
1106
+ img_height = text_height + padding * 2
1107
+
1108
+ # Create image with background color or transparent
1109
+ if bg_color:
1110
+ img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
1111
+ else:
1112
+ img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
1113
+
1114
+ # Draw text
1115
+ draw = ImageDraw.Draw(img)
1116
+ draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
1117
+
1118
+ # Convert to numpy array for MoviePy
1119
+ img_array = np.array(img)
1120
+ clip = ImageClip(img_array)
1121
+ return clip, img_width, img_height
1122
+
1123
+ except Exception as e:
1124
+ self.log(warning(f"Error creating text clip: {str(e)}"))
1125
+ # Create a simple colored rectangle as fallback
1126
+ img = Image.new('RGB', (100, 50), color=(100, 100, 100))
1127
+ img_array = np.array(img)
1128
+ clip = ImageClip(img_array)
1129
+ return clip, 100, 50
1130
+
1131
+ # First, create and position all the regular words at once
1132
+ for i, metric in enumerate(word_metrics):
1133
+ word = metric["word"]
1134
+ word_width = metric["width"]
1135
+ word_height = metric["height"]
1136
+
1137
+ # Check if word fits on current line
1138
+ if x_pos + word_width > frame_size[0] - 2 * x_buffer:
1139
+ x_pos = 0
1140
+ y_pos += word_height + 20
1141
+
1142
+ # Store position info for highlighting
1143
+ word_positions.append({
1144
+ "word": word,
1145
+ "x_pos": x_pos + x_buffer,
1146
+ "y_pos": y_pos + y_buffer,
1147
+ "width": word_width,
1148
+ "height": word_height,
1149
+ "start": metric["start"],
1150
+ "end": metric["end"]
1151
+ })
1152
+
1153
+ # Create the word clip
1154
+ word_clip, _, _ = create_text_clip(word)
1155
+ word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
1156
+ word_clip = word_clip.set_start(line["start"]).set_duration(line_duration)
1157
+ subtitle_clips.append(word_clip)
1158
+
1159
+ # Add space after word (except for last word)
1160
+ if i < len(word_metrics) - 1:
1161
+ space_clip, _, _ = create_text_clip(" ")
1162
+ space_clip = space_clip.set_position((x_pos + word_width + x_buffer - space_width, y_pos + y_buffer))
1163
+ space_clip = space_clip.set_start(line["start"]).set_duration(line_duration)
1164
+ subtitle_clips.append(space_clip)
1165
+
1166
+ x_pos += word_width
1167
 
1168
+ # Only add highlighted words if highlighting is enabled
1169
+ if highlighting_enabled and bg_color:
1170
+ for word_pos in word_positions:
1171
+ highlight_clip, _, _ = create_text_clip(word_pos["word"], bg_color)
1172
+ highlight_clip = highlight_clip.set_position((word_pos["x_pos"], word_pos["y_pos"]))
1173
+ highlight_clip = highlight_clip.set_start(word_pos["start"]).set_duration(word_pos["end"] - word_pos["start"])
1174
+ subtitle_clips.append(highlight_clip)
1175
+
1176
+ return subtitle_clips
 
 
 
 
 
 
 
1177
 
1178
  def combine(self) -> str:
1179
  """Combine images, audio, and subtitles into a final video."""
1180
  self.progress(0.8, desc="Creating final video")
1181
  self.log("Combining images and audio into final video")
1182
  try:
1183
+ # Always save to the generation folder when available
1184
+ if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
1185
+ output_path = os.path.join(self.generation_folder, f"output_{int(time.time())}.mp4")
1186
+ else:
1187
+ output_path = os.path.join(STORAGE_DIR, f"output_{int(time.time())}.mp4")
1188
 
1189
  # Check for required files
1190
  if not self.images:
 
1201
  num_images = len(self.images)
1202
  req_dur = max_duration / num_images
1203
 
1204
+ # Create video clips from images more efficiently
1205
+ self.log("Processing images for video")
1206
  clips = []
1207
  tot_dur = 0
1208
 
1209
+ # Pre-compute standard size and aspect ratio
1210
+ target_size = (1080, 1920)
1211
+ aspect_ratio = 9/16
1212
+
1213
+ # Process all images at once
1214
+ for image_path in self.images:
1215
+ # Check if image exists and is valid
1216
+ if not os.path.exists(image_path):
1217
+ self.log(warning(f"Image not found: {image_path}, skipping"))
1218
+ continue
1219
+
1220
+ # Calculate remaining duration
1221
+ duration = min(req_dur, max_duration - tot_dur)
1222
+ if duration <= 0:
1223
+ break
1224
 
1225
+ try:
1226
+ clip = ImageClip(image_path)
1227
+ clip = clip.set_duration(duration)
1228
+ clip = clip.set_fps(30)
1229
+
1230
+ # Handle aspect ratio (vertical video for shorts)
1231
+ if clip.w / clip.h < aspect_ratio:
1232
+ # Image is too tall, crop height
1233
+ clip = crop(
1234
+ clip,
1235
+ width=clip.w,
1236
+ height=round(clip.w / aspect_ratio),
1237
+ x_center=clip.w / 2,
1238
+ y_center=clip.h / 2
1239
+ )
1240
+ else:
1241
+ # Image is too wide, crop width
1242
+ clip = crop(
1243
+ clip,
1244
+ width=round(aspect_ratio * clip.h),
1245
+ height=clip.h,
1246
+ x_center=clip.w / 2,
1247
+ y_center=clip.h / 2
1248
+ )
1249
+
1250
+ # Resize to standard size for shorts
1251
+ clip = clip.resize(target_size)
1252
+ clips.append(clip)
1253
+ tot_dur += duration
1254
+
1255
+ # If we've exceeded the duration, break
1256
+ if tot_dur >= max_duration:
1257
+ break
1258
+ except Exception as e:
1259
+ self.log(warning(f"Error processing image {image_path}: {str(e)}"))
 
1260
 
1261
  # Create video from clips
1262
  self.log(f"Creating video from {len(clips)} clips")
1263
  final_clip = concatenate_videoclips(clips)
1264
  final_clip = final_clip.set_fps(30)
1265
 
1266
+ # Add subtitles if enabled - skip entirely if disabled
1267
+ subtitle_clips = []
1268
  if self.subtitles_enabled and hasattr(self, 'subtitle_data'):
1269
+ self.log("Generating subtitle clips")
1270
+ subtitle_clips = self.create_subtitle_clip(self.subtitle_data, target_size)
1271
+ if subtitle_clips:
1272
+ final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
1273
 
1274
  # Add background music if available
1275
  music_path = None
 
1301
  # Set final audio
1302
  final_clip = final_clip.set_audio(final_audio)
1303
 
1304
+ # Write final video - use faster preset
1305
  self.log("Writing final video file")
1306
  final_clip.write_videofile(
1307
  output_path,
 
1309
  codec="libx264",
1310
  audio_codec="aac",
1311
  threads=4,
1312
+ preset="ultrafast" # Changed from "medium" to "ultrafast" for faster rendering
1313
  )
1314
 
1315
  self.log(success(f"Video saved to: {output_path}"))
 
1318
  except Exception as e:
1319
  error_msg = f"Error combining video: {str(e)}"
1320
  self.log(error(error_msg))
1321
+ raise Exception(error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1322
 
1323
  def generate_video(self) -> dict:
1324
  """Generate complete video with all components."""
1325
  try:
1326
  self.log("Starting video generation process")
1327
 
1328
+ # Create a unique folder with sequential numbering
1329
+ folder_num = 1
1330
+ # Check existing folders to find the latest number
1331
+ if os.path.exists(STORAGE_DIR):
1332
+ existing_folders = [d for d in os.listdir(STORAGE_DIR) if os.path.isdir(os.path.join(STORAGE_DIR, d))]
1333
+ numbered_folders = []
1334
+ for folder in existing_folders:
1335
+ try:
1336
+ # Extract folder number from format "N_UUID"
1337
+ if "_" in folder:
1338
+ num = int(folder.split("_")[0])
1339
+ numbered_folders.append(num)
1340
+ except (ValueError, IndexError):
1341
+ continue
1342
+
1343
+ if numbered_folders:
1344
+ folder_num = max(numbered_folders) + 1
1345
+
1346
+ folder_id = f"{folder_num}_{str(uuid.uuid4())}"
1347
+ self.generation_folder = os.path.join(STORAGE_DIR, folder_id)
1348
  os.makedirs(self.generation_folder, exist_ok=True)
1349
  self.log(f"Created generation folder: {self.generation_folder}")
1350
 
 
1385
  self.progress(0.7, desc="Generating subtitles")
1386
  if self.subtitles_enabled and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
1387
  self.subtitle_data = self.generate_subtitles(self.tts_path)
1388
+ # Save subtitles to generation folder
1389
+ if self.subtitle_data:
1390
+ try:
1391
+ # Save word-level subtitles
1392
+ if 'wordlevel' in self.subtitle_data:
1393
+ word_subtitles_path = os.path.join(self.generation_folder, "word_subtitles.json")
1394
+ with open(word_subtitles_path, 'w') as f:
1395
+ json.dump(self.subtitle_data['wordlevel'], f, indent=2)
1396
+ self.log(f"Saved word-level subtitles to: {word_subtitles_path}")
1397
+
1398
+ # Save line-level subtitles
1399
+ if 'linelevel' in self.subtitle_data:
1400
+ line_subtitles_path = os.path.join(self.generation_folder, "line_subtitles.json")
1401
+ with open(line_subtitles_path, 'w') as f:
1402
+ json.dump(self.subtitle_data['linelevel'], f, indent=2)
1403
+ self.log(f"Saved line-level subtitles to: {line_subtitles_path}")
1404
+ except Exception as e:
1405
+ self.log(warning(f"Error saving subtitles to generation folder: {str(e)}"))
1406
+
1407
+ # Step 8: Save content.txt with all metadata and generation info
1408
+ self.progress(0.75, desc="Saving generation data")
1409
+ try:
1410
+ content_path = os.path.join(self.generation_folder, "content.txt")
1411
+ with open(content_path, 'w', encoding='utf-8') as f:
1412
+ f.write(f"NICHE: {self.niche}\n\n")
1413
+ f.write(f"LANGUAGE: {self.language}\n\n")
1414
+ f.write(f"GENERATED TOPIC: {self.subject}\n\n")
1415
+ f.write(f"GENERATED SCRIPT:\n{self.script}\n\n")
1416
+ f.write(f"GENERATED PROMPTS:\n")
1417
+ for i, prompt in enumerate(self.image_prompts, 1):
1418
+ f.write(f"{i}. {prompt}\n")
1419
+ f.write("\n")
1420
+ f.write(f"GENERATED METADATA:\n")
1421
+ for key, value in self.metadata.items():
1422
+ f.write(f"{key}: {value}\n")
1423
+ self.log(f"Saved content.txt to: {content_path}")
1424
+ except Exception as e:
1425
+ self.log(warning(f"Error saving content.txt: {str(e)}"))
1426
 
1427
+ # Step 9: Combine all elements into final video
1428
  self.progress(0.8, desc="Creating final video")
1429
  self.log("Combining all elements into final video")
1430
  path = self.combine()
 
1446
  except Exception as e:
1447
  error_msg = f"Error during video generation: {str(e)}"
1448
  self.log(error(error_msg))
1449
+ raise Exception(error_msg)
 
 
 
 
 
 
1450
 
1451
  # Data for dynamic dropdowns
1452
  def get_text_generator_models(generator):
 
1588
  text_gen = gr.Dropdown(
1589
  choices=["g4f", "gemini", "openai"],
1590
  label="Text Generator",
1591
+ value="gemini"
1592
  )
1593
  text_model = gr.Dropdown(
1594
  choices=get_text_generator_models("g4f"),
1595
  label="Text Model",
1596
+ value="gemini-2.0-flash"
1597
  )
1598
 
1599
  with gr.TabItem("Image"):
 
1832
  os.makedirs(STATIC_DIR, exist_ok=True)
1833
  os.makedirs(MUSIC_DIR, exist_ok=True)
1834
  os.makedirs(FONTS_DIR, exist_ok=True)
1835
+ os.makedirs(STORAGE_DIR, exist_ok=True)
1836
 
1837
  # Launch the app
1838
  demo = create_interface()