testdeep123 commited on
Commit
369793b
·
verified ·
1 Parent(s): 06b8655

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -138
app.py CHANGED
@@ -1,12 +1,7 @@
1
-
2
-
3
  # Import necessary libraries
4
  from kokoro import KPipeline
5
-
6
  import soundfile as sf
7
  import torch
8
-
9
- import soundfile as sf
10
  import os
11
  from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
  from PIL import Image
@@ -14,67 +9,46 @@ import tempfile
14
  import random
15
  import cv2
16
  import math
17
- import os, requests, io, time, re, random
 
 
18
  from moviepy.editor import (
19
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
  CompositeVideoClip, TextClip, CompositeAudioClip
21
  )
22
  import gradio as gr
23
  import shutil
24
- import os
25
- import moviepy.video.fx.all as vfx
26
  import moviepy.config as mpy_config
27
  from pydub import AudioSegment
28
- from pydub.generators import Sine
29
-
30
- from PIL import Image, ImageDraw, ImageFont
31
- import numpy as np
32
  from bs4 import BeautifulSoup
33
- import base64
34
  from urllib.parse import quote
35
- import pysrt
36
  from gtts import gTTS
37
- import gradio as gr # Import Gradio
38
 
39
  # Initialize Kokoro TTS pipeline (using American English)
40
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
41
  # Ensure ImageMagick binary is set
42
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
43
 
44
  # ---------------- Global Configuration ---------------- #
45
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
46
  OPENROUTER_API_KEY = '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'
47
- OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
48
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
49
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
50
 
51
-
52
-
53
- # Additional global variables needed for the Gradio interface
54
- selected_voice = 'af_heart' # Default voice
55
- voice_speed = 0.9 # Default voice speed
56
- font_size = 45 # Default font size
57
- video_clip_probability = 0.25 # Default probability for video clips
58
- bg_music_volume = 0.08 # Default background music volume
59
- fps = 30 # Default FPS
60
- preset = "veryfast" # Default preset
61
  TARGET_RESOLUTION = None
62
  CAPTION_COLOR = None
63
  TEMP_FOLDER = None
64
 
65
-
66
  # ---------------- Helper Functions ---------------- #
67
- # (Your existing helper functions remain unchanged: generate_script, parse_script,
68
- # search_pexels_videos, search_pexels_images, search_google_images, download_image,
69
- # download_video, generate_media, generate_tts, apply_kenburns_effect,
70
- # resize_to_fill, find_mp3_files, add_background_music, create_clip,
71
- # fix_imagemagick_policy)
72
-
73
- # Define these globally as they were in your original code but will be set per run
74
- TARGET_RESOLUTION = None
75
- CAPTION_COLOR = None
76
- TEMP_FOLDER = None
77
-
78
  def generate_script(user_input):
79
  """Generate documentary script with proper OpenRouter handling."""
80
  headers = {
@@ -91,51 +65,32 @@ If I provide a full script, rewrite it without any changes. Make everything shor
91
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
92
  Formatting Rules:
93
 
94
-
95
  Title in Square Brackets:
96
 
97
-
98
  Each section starts with a one-word title inside [ ] (max two words if necessary).
99
  This title will be used as a search term for Pexels footage.
100
 
101
-
102
-
103
  Casual & Funny Narration:
104
 
105
-
106
  Each section has 5-10 words of narration.
107
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
108
 
109
-
110
-
111
  No Special Formatting:
112
 
113
-
114
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
115
 
116
-
117
-
118
  Generalized Search Terms:
119
 
120
-
121
  If a term is too specific, make it more general for Pexels search.
122
 
123
-
124
-
125
  Scene-Specific Writing:
126
 
127
-
128
  Each section describes only what should be shown in the video.
129
 
130
-
131
-
132
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
133
 
134
-
135
  No extra text, just the script.
136
 
137
-
138
-
139
  Example Output:
140
  [North Korea]
141
 
@@ -143,23 +98,23 @@ Top 5 unknown facts about North Korea.
143
 
144
  [Invisibility]
145
 
146
- North Koreas internet speed is so fast it doesnt exist.
147
 
148
  [Leadership]
149
 
150
- Kim Jong-un once won an election with 100% votes against himself.
151
 
152
  [Magic]
153
 
154
- North Korea discovered time travel. Thats why their news is always from the past.
155
 
156
  [Warning]
157
 
158
- Subscribe now, or Kim Jong-un will send you a free one-way ticket to North Korea.
159
 
160
  [Freedom]
161
 
162
- North Korean citizens can do anything�� as long as it's government-approved.
163
  Now here is the Topic/scrip: {user_input}
164
  """
165
 
@@ -194,12 +149,7 @@ Now here is the Topic/scrip: {user_input}
194
  return None
195
 
196
  def parse_script(script_text):
197
- """
198
- Parse the generated script into a list of elements.
199
- For each section, create two elements:
200
- - A 'media' element using the section title as the visual prompt.
201
- - A 'tts' element with the narration text, voice info, and computed duration.
202
- """
203
  sections = {}
204
  current_title = None
205
  current_text = ""
@@ -696,40 +646,20 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
696
 
697
  if narration_text and CAPTION_COLOR != "transparent":
698
  try:
699
- words = narration_text.split()
700
- chunks = []
701
- current_chunk = []
702
- for word in words:
703
- current_chunk.append(word)
704
- if len(current_chunk) >= 5:
705
- chunks.append(' '.join(current_chunk))
706
- current_chunk = []
707
- if current_chunk:
708
- chunks.append(' '.join(current_chunk))
709
-
710
- chunk_duration = audio_duration / len(chunks)
711
- subtitle_clips = []
712
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
713
-
714
- for i, chunk_text in enumerate(chunks):
715
- start_time = i * chunk_duration
716
- end_time = (i + 1) * chunk_duration
717
- txt_clip = TextClip(
718
- chunk_text,
719
- fontsize=45,
720
- font='Arial-Bold',
721
- color=CAPTION_COLOR,
722
- bg_color='rgba(0, 0, 0, 0.25)',
723
- method='caption',
724
- align='center',
725
- stroke_width=2,
726
- stroke_color=CAPTION_COLOR,
727
- size=(TARGET_RESOLUTION[0] * 0.8, None)
728
- ).set_start(start_time).set_end(end_time)
729
- txt_clip = txt_clip.set_position(('center', subtitle_y_position))
730
- subtitle_clips.append(txt_clip)
731
-
732
- clip = CompositeVideoClip([clip] + subtitle_clips)
733
  except Exception as sub_error:
734
  print(f"Subtitle error: {sub_error}")
735
  txt_clip = TextClip(
@@ -737,8 +667,8 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
737
  fontsize=font_size,
738
  color=CAPTION_COLOR,
739
  align='center',
740
- size=(TARGET_RESOLUTION[0] * 0.7, None)
741
- ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
742
  clip = CompositeVideoClip([clip, txt_clip])
743
 
744
  clip = clip.set_audio(audio_clip)
@@ -773,32 +703,6 @@ def fix_imagemagick_policy():
773
  print(f"Error fixing policies: {e}")
774
  return False
775
 
776
-
777
-
778
-
779
-
780
-
781
-
782
-
783
-
784
-
785
-
786
-
787
-
788
-
789
-
790
-
791
-
792
-
793
-
794
-
795
-
796
-
797
-
798
-
799
-
800
-
801
-
802
  # ---------------- Main Video Generation Function ---------------- #
803
  def generate_video(user_input, resolution, caption_option):
804
  """Generate a video based on user input via Gradio."""
@@ -931,7 +835,7 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
931
  selected_voice = VOICE_CHOICES[voice]
932
  voice_speed = v_speed
933
  font_size = caption_size
934
- video_clip_probability = vclip_prob / 100 # Convert from percentage to decimal
935
  bg_music_volume = bg_vol
936
  fps = video_fps
937
  preset = video_preset
@@ -942,8 +846,15 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
942
  shutil.copy(music_file.name, target_path)
943
  print(f"Uploaded music saved as: {target_path}")
944
 
945
- # Generate the video
946
- return generate_video(user_input, resolution, caption_option)
 
 
 
 
 
 
 
947
 
948
  # Create the Gradio interface
949
  iface = gr.Interface(
@@ -957,14 +868,21 @@ iface = gr.Interface(
957
  gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
958
  gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
959
  gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
960
- gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
961
- value="veryfast", label="Export Preset"),
 
 
 
962
  gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
963
  gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
964
  ],
965
- outputs=gr.Video(label="Generated Video"),
 
 
 
966
  title="AI Documentary Video Generator",
967
- description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
 
968
  )
969
 
970
  # Launch the interface
 
 
 
1
  # Import necessary libraries
2
  from kokoro import KPipeline
 
3
  import soundfile as sf
4
  import torch
 
 
5
  import os
6
  from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
7
  from PIL import Image
 
9
  import random
10
  import cv2
11
  import math
12
+ import requests
13
+ import time
14
+ import re
15
  from moviepy.editor import (
16
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
17
  CompositeVideoClip, TextClip, CompositeAudioClip
18
  )
19
  import gradio as gr
20
  import shutil
 
 
21
  import moviepy.config as mpy_config
22
  from pydub import AudioSegment
 
 
 
 
23
  from bs4 import BeautifulSoup
 
24
  from urllib.parse import quote
 
25
  from gtts import gTTS
 
26
 
27
  # Initialize Kokoro TTS pipeline (using American English)
28
+ pipeline = KPipeline(lang_code='a')
29
  # Ensure ImageMagick binary is set
30
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
31
 
32
  # ---------------- Global Configuration ---------------- #
33
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
  OPENROUTER_API_KEY = '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'
35
+ OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
36
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
37
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
 
39
+ # Additional global variables
40
+ selected_voice = 'af_heart'
41
+ voice_speed = 0.9
42
+ font_size = 45
43
+ video_clip_probability = 0.25
44
+ bg_music_volume = 0.08
45
+ fps = 30
46
+ preset = "veryfast"
 
 
47
  TARGET_RESOLUTION = None
48
  CAPTION_COLOR = None
49
  TEMP_FOLDER = None
50
 
 
51
  # ---------------- Helper Functions ---------------- #
 
 
 
 
 
 
 
 
 
 
 
52
  def generate_script(user_input):
53
  """Generate documentary script with proper OpenRouter handling."""
54
  headers = {
 
65
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
66
  Formatting Rules:
67
 
 
68
  Title in Square Brackets:
69
 
 
70
  Each section starts with a one-word title inside [ ] (max two words if necessary).
71
  This title will be used as a search term for Pexels footage.
72
 
 
 
73
  Casual & Funny Narration:
74
 
 
75
  Each section has 5-10 words of narration.
76
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
77
 
 
 
78
  No Special Formatting:
79
 
 
80
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
81
 
 
 
82
  Generalized Search Terms:
83
 
 
84
  If a term is too specific, make it more general for Pexels search.
85
 
 
 
86
  Scene-Specific Writing:
87
 
 
88
  Each section describes only what should be shown in the video.
89
 
 
 
90
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
91
 
 
92
  No extra text, just the script.
93
 
 
 
94
  Example Output:
95
  [North Korea]
96
 
 
98
 
99
  [Invisibility]
100
 
101
+ North Korea's internet speed is so fast... it doesn't exist.
102
 
103
  [Leadership]
104
 
105
+ Kim Jong-un once won an election with 100% votes... against himself.
106
 
107
  [Magic]
108
 
109
+ North Korea discovered time travel. That's why their news is always from the past.
110
 
111
  [Warning]
112
 
113
+ Subscribe now, or Kim Jong-un will send you a free one-way ticket... to North Korea.
114
 
115
  [Freedom]
116
 
117
+ North Korean citizens can do anything... as long as it's government-approved.
118
  Now here is the Topic/scrip: {user_input}
119
  """
120
 
 
149
  return None
150
 
151
  def parse_script(script_text):
152
+ """Parse the generated script into a list of elements."""
 
 
 
 
 
153
  sections = {}
154
  current_title = None
155
  current_text = ""
 
646
 
647
  if narration_text and CAPTION_COLOR != "transparent":
648
  try:
649
+ txt_clip = TextClip(
650
+ narration_text,
651
+ fontsize=font_size,
652
+ color=CAPTION_COLOR,
653
+ font='Arial-Bold',
654
+ bg_color='rgba(0,0,0,0.5)',
655
+ method='caption',
656
+ align='center',
657
+ size=(TARGET_RESOLUTION[0]*0.9, None),
658
+ stroke_color='black',
659
+ stroke_width=1
660
+ ).set_duration(clip.duration)
661
+ txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1]*0.8))
662
+ clip = CompositeVideoClip([clip, txt_clip])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  except Exception as sub_error:
664
  print(f"Subtitle error: {sub_error}")
665
  txt_clip = TextClip(
 
667
  fontsize=font_size,
668
  color=CAPTION_COLOR,
669
  align='center',
670
+ size=(TARGET_RESOLUTION[0]*0.8, None)
671
+ ).set_position(('center', TARGET_RESOLUTION[1]*0.8)).set_duration(clip.duration)
672
  clip = CompositeVideoClip([clip, txt_clip])
673
 
674
  clip = clip.set_audio(audio_clip)
 
703
  print(f"Error fixing policies: {e}")
704
  return False
705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  # ---------------- Main Video Generation Function ---------------- #
707
  def generate_video(user_input, resolution, caption_option):
708
  """Generate a video based on user input via Gradio."""
 
835
  selected_voice = VOICE_CHOICES[voice]
836
  voice_speed = v_speed
837
  font_size = caption_size
838
+ video_clip_probability = vclip_prob / 100
839
  bg_music_volume = bg_vol
840
  fps = video_fps
841
  preset = video_preset
 
846
  shutil.copy(music_file.name, target_path)
847
  print(f"Uploaded music saved as: {target_path}")
848
 
849
+ # First generate and return the script
850
+ print("Generating script from API...")
851
+ script = generate_script(user_input)
852
+ if not script:
853
+ return None, "Failed to generate script. Please try again."
854
+
855
+ # Then generate the video
856
+ video_path = generate_video(user_input, resolution, caption_option)
857
+ return video_path, script
858
 
859
  # Create the Gradio interface
860
  iface = gr.Interface(
 
868
  gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
869
  gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
870
  gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
871
+ gr.Dropdown(
872
+ choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
873
+ value="veryfast",
874
+ label="Export Preset"
875
+ ),
876
  gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
877
  gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
878
  ],
879
+ outputs=[
880
+ gr.Video(label="Generated Video"),
881
+ gr.Textbox(label="Generated Script", lines=10, interactive=False)
882
+ ],
883
  title="AI Documentary Video Generator",
884
+ description="Create short documentary videos with AI. Upload music, choose voice, and customize settings.",
885
+ allow_flagging="never"
886
  )
887
 
888
  # Launch the interface