Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
-
|
2 |
-
|
3 |
# Import necessary libraries
|
4 |
from kokoro import KPipeline
|
5 |
-
|
6 |
import soundfile as sf
|
7 |
import torch
|
8 |
-
|
9 |
-
import soundfile as sf
|
10 |
import os
|
11 |
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
|
12 |
from PIL import Image
|
@@ -14,67 +9,46 @@ import tempfile
|
|
14 |
import random
|
15 |
import cv2
|
16 |
import math
|
17 |
-
import
|
|
|
|
|
18 |
from moviepy.editor import (
|
19 |
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
|
20 |
CompositeVideoClip, TextClip, CompositeAudioClip
|
21 |
)
|
22 |
import gradio as gr
|
23 |
import shutil
|
24 |
-
import os
|
25 |
-
import moviepy.video.fx.all as vfx
|
26 |
import moviepy.config as mpy_config
|
27 |
from pydub import AudioSegment
|
28 |
-
from pydub.generators import Sine
|
29 |
-
|
30 |
-
from PIL import Image, ImageDraw, ImageFont
|
31 |
-
import numpy as np
|
32 |
from bs4 import BeautifulSoup
|
33 |
-
import base64
|
34 |
from urllib.parse import quote
|
35 |
-
import pysrt
|
36 |
from gtts import gTTS
|
37 |
-
import gradio as gr # Import Gradio
|
38 |
|
39 |
# Initialize Kokoro TTS pipeline (using American English)
|
40 |
-
pipeline = KPipeline(lang_code='a')
|
41 |
# Ensure ImageMagick binary is set
|
42 |
mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
|
43 |
|
44 |
# ---------------- Global Configuration ---------------- #
|
45 |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
|
46 |
OPENROUTER_API_KEY = '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'
|
47 |
-
OPENROUTER_MODEL = "
|
48 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
49 |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
fps = 30 # Default FPS
|
60 |
-
preset = "veryfast" # Default preset
|
61 |
TARGET_RESOLUTION = None
|
62 |
CAPTION_COLOR = None
|
63 |
TEMP_FOLDER = None
|
64 |
|
65 |
-
|
66 |
# ---------------- Helper Functions ---------------- #
|
67 |
-
# (Your existing helper functions remain unchanged: generate_script, parse_script,
|
68 |
-
# search_pexels_videos, search_pexels_images, search_google_images, download_image,
|
69 |
-
# download_video, generate_media, generate_tts, apply_kenburns_effect,
|
70 |
-
# resize_to_fill, find_mp3_files, add_background_music, create_clip,
|
71 |
-
# fix_imagemagick_policy)
|
72 |
-
|
73 |
-
# Define these globally as they were in your original code but will be set per run
|
74 |
-
TARGET_RESOLUTION = None
|
75 |
-
CAPTION_COLOR = None
|
76 |
-
TEMP_FOLDER = None
|
77 |
-
|
78 |
def generate_script(user_input):
|
79 |
"""Generate documentary script with proper OpenRouter handling."""
|
80 |
headers = {
|
@@ -91,51 +65,32 @@ If I provide a full script, rewrite it without any changes. Make everything shor
|
|
91 |
And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
|
92 |
Formatting Rules:
|
93 |
|
94 |
-
|
95 |
Title in Square Brackets:
|
96 |
|
97 |
-
|
98 |
Each section starts with a one-word title inside [ ] (max two words if necessary).
|
99 |
This title will be used as a search term for Pexels footage.
|
100 |
|
101 |
-
|
102 |
-
|
103 |
Casual & Funny Narration:
|
104 |
|
105 |
-
|
106 |
Each section has 5-10 words of narration.
|
107 |
Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
|
108 |
|
109 |
-
|
110 |
-
|
111 |
No Special Formatting:
|
112 |
|
113 |
-
|
114 |
No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
|
115 |
|
116 |
-
|
117 |
-
|
118 |
Generalized Search Terms:
|
119 |
|
120 |
-
|
121 |
If a term is too specific, make it more general for Pexels search.
|
122 |
|
123 |
-
|
124 |
-
|
125 |
Scene-Specific Writing:
|
126 |
|
127 |
-
|
128 |
Each section describes only what should be shown in the video.
|
129 |
|
130 |
-
|
131 |
-
|
132 |
Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
|
133 |
|
134 |
-
|
135 |
No extra text, just the script.
|
136 |
|
137 |
-
|
138 |
-
|
139 |
Example Output:
|
140 |
[North Korea]
|
141 |
|
@@ -143,23 +98,23 @@ Top 5 unknown facts about North Korea.
|
|
143 |
|
144 |
[Invisibility]
|
145 |
|
146 |
-
North Korea
|
147 |
|
148 |
[Leadership]
|
149 |
|
150 |
-
Kim Jong-un once won an election with 100% votes
|
151 |
|
152 |
[Magic]
|
153 |
|
154 |
-
North Korea discovered time travel. That
|
155 |
|
156 |
[Warning]
|
157 |
|
158 |
-
Subscribe now, or Kim Jong-un will send you a free one-way ticket
|
159 |
|
160 |
[Freedom]
|
161 |
|
162 |
-
North Korean citizens can do anything
|
163 |
Now here is the Topic/scrip: {user_input}
|
164 |
"""
|
165 |
|
@@ -194,12 +149,7 @@ Now here is the Topic/scrip: {user_input}
|
|
194 |
return None
|
195 |
|
196 |
def parse_script(script_text):
|
197 |
-
"""
|
198 |
-
Parse the generated script into a list of elements.
|
199 |
-
For each section, create two elements:
|
200 |
-
- A 'media' element using the section title as the visual prompt.
|
201 |
-
- A 'tts' element with the narration text, voice info, and computed duration.
|
202 |
-
"""
|
203 |
sections = {}
|
204 |
current_title = None
|
205 |
current_text = ""
|
@@ -696,40 +646,20 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
|
|
696 |
|
697 |
if narration_text and CAPTION_COLOR != "transparent":
|
698 |
try:
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
for i, chunk_text in enumerate(chunks):
|
715 |
-
start_time = i * chunk_duration
|
716 |
-
end_time = (i + 1) * chunk_duration
|
717 |
-
txt_clip = TextClip(
|
718 |
-
chunk_text,
|
719 |
-
fontsize=45,
|
720 |
-
font='Arial-Bold',
|
721 |
-
color=CAPTION_COLOR,
|
722 |
-
bg_color='rgba(0, 0, 0, 0.25)',
|
723 |
-
method='caption',
|
724 |
-
align='center',
|
725 |
-
stroke_width=2,
|
726 |
-
stroke_color=CAPTION_COLOR,
|
727 |
-
size=(TARGET_RESOLUTION[0] * 0.8, None)
|
728 |
-
).set_start(start_time).set_end(end_time)
|
729 |
-
txt_clip = txt_clip.set_position(('center', subtitle_y_position))
|
730 |
-
subtitle_clips.append(txt_clip)
|
731 |
-
|
732 |
-
clip = CompositeVideoClip([clip] + subtitle_clips)
|
733 |
except Exception as sub_error:
|
734 |
print(f"Subtitle error: {sub_error}")
|
735 |
txt_clip = TextClip(
|
@@ -737,8 +667,8 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
|
|
737 |
fontsize=font_size,
|
738 |
color=CAPTION_COLOR,
|
739 |
align='center',
|
740 |
-
size=(TARGET_RESOLUTION[0]
|
741 |
-
).set_position(('center',
|
742 |
clip = CompositeVideoClip([clip, txt_clip])
|
743 |
|
744 |
clip = clip.set_audio(audio_clip)
|
@@ -773,32 +703,6 @@ def fix_imagemagick_policy():
|
|
773 |
print(f"Error fixing policies: {e}")
|
774 |
return False
|
775 |
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
# ---------------- Main Video Generation Function ---------------- #
|
803 |
def generate_video(user_input, resolution, caption_option):
|
804 |
"""Generate a video based on user input via Gradio."""
|
@@ -931,7 +835,7 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
|
|
931 |
selected_voice = VOICE_CHOICES[voice]
|
932 |
voice_speed = v_speed
|
933 |
font_size = caption_size
|
934 |
-
video_clip_probability = vclip_prob / 100
|
935 |
bg_music_volume = bg_vol
|
936 |
fps = video_fps
|
937 |
preset = video_preset
|
@@ -942,8 +846,15 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
|
|
942 |
shutil.copy(music_file.name, target_path)
|
943 |
print(f"Uploaded music saved as: {target_path}")
|
944 |
|
945 |
-
#
|
946 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
947 |
|
948 |
# Create the Gradio interface
|
949 |
iface = gr.Interface(
|
@@ -957,14 +868,21 @@ iface = gr.Interface(
|
|
957 |
gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
|
958 |
gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
|
959 |
gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
|
960 |
-
gr.Dropdown(
|
961 |
-
|
|
|
|
|
|
|
962 |
gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
|
963 |
gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
|
964 |
],
|
965 |
-
outputs=
|
|
|
|
|
|
|
966 |
title="AI Documentary Video Generator",
|
967 |
-
description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
|
|
|
968 |
)
|
969 |
|
970 |
# Launch the interface
|
|
|
|
|
|
|
1 |
# Import necessary libraries
|
2 |
from kokoro import KPipeline
|
|
|
3 |
import soundfile as sf
|
4 |
import torch
|
|
|
|
|
5 |
import os
|
6 |
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
|
7 |
from PIL import Image
|
|
|
9 |
import random
|
10 |
import cv2
|
11 |
import math
|
12 |
+
import requests
|
13 |
+
import time
|
14 |
+
import re
|
15 |
from moviepy.editor import (
|
16 |
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
|
17 |
CompositeVideoClip, TextClip, CompositeAudioClip
|
18 |
)
|
19 |
import gradio as gr
|
20 |
import shutil
|
|
|
|
|
21 |
import moviepy.config as mpy_config
|
22 |
from pydub import AudioSegment
|
|
|
|
|
|
|
|
|
23 |
from bs4 import BeautifulSoup
|
|
|
24 |
from urllib.parse import quote
|
|
|
25 |
from gtts import gTTS
|
|
|
26 |
|
27 |
# Initialize Kokoro TTS pipeline (using American English)
|
28 |
+
pipeline = KPipeline(lang_code='a')
|
29 |
# Ensure ImageMagick binary is set
|
30 |
mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
|
31 |
|
32 |
# ---------------- Global Configuration ---------------- #
|
33 |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
|
34 |
OPENROUTER_API_KEY = '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'
|
35 |
+
OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
|
36 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
37 |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
38 |
|
39 |
+
# Additional global variables
|
40 |
+
selected_voice = 'af_heart'
|
41 |
+
voice_speed = 0.9
|
42 |
+
font_size = 45
|
43 |
+
video_clip_probability = 0.25
|
44 |
+
bg_music_volume = 0.08
|
45 |
+
fps = 30
|
46 |
+
preset = "veryfast"
|
|
|
|
|
47 |
TARGET_RESOLUTION = None
|
48 |
CAPTION_COLOR = None
|
49 |
TEMP_FOLDER = None
|
50 |
|
|
|
51 |
# ---------------- Helper Functions ---------------- #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def generate_script(user_input):
|
53 |
"""Generate documentary script with proper OpenRouter handling."""
|
54 |
headers = {
|
|
|
65 |
And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
|
66 |
Formatting Rules:
|
67 |
|
|
|
68 |
Title in Square Brackets:
|
69 |
|
|
|
70 |
Each section starts with a one-word title inside [ ] (max two words if necessary).
|
71 |
This title will be used as a search term for Pexels footage.
|
72 |
|
|
|
|
|
73 |
Casual & Funny Narration:
|
74 |
|
|
|
75 |
Each section has 5-10 words of narration.
|
76 |
Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
|
77 |
|
|
|
|
|
78 |
No Special Formatting:
|
79 |
|
|
|
80 |
No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
|
81 |
|
|
|
|
|
82 |
Generalized Search Terms:
|
83 |
|
|
|
84 |
If a term is too specific, make it more general for Pexels search.
|
85 |
|
|
|
|
|
86 |
Scene-Specific Writing:
|
87 |
|
|
|
88 |
Each section describes only what should be shown in the video.
|
89 |
|
|
|
|
|
90 |
Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
|
91 |
|
|
|
92 |
No extra text, just the script.
|
93 |
|
|
|
|
|
94 |
Example Output:
|
95 |
[North Korea]
|
96 |
|
|
|
98 |
|
99 |
[Invisibility]
|
100 |
|
101 |
+
North Korea's internet speed is so fast... it doesn't exist.
|
102 |
|
103 |
[Leadership]
|
104 |
|
105 |
+
Kim Jong-un once won an election with 100% votes... against himself.
|
106 |
|
107 |
[Magic]
|
108 |
|
109 |
+
North Korea discovered time travel. That's why their news is always from the past.
|
110 |
|
111 |
[Warning]
|
112 |
|
113 |
+
Subscribe now, or Kim Jong-un will send you a free one-way ticket... to North Korea.
|
114 |
|
115 |
[Freedom]
|
116 |
|
117 |
+
North Korean citizens can do anything... as long as it's government-approved.
|
118 |
Now here is the Topic/scrip: {user_input}
|
119 |
"""
|
120 |
|
|
|
149 |
return None
|
150 |
|
151 |
def parse_script(script_text):
|
152 |
+
"""Parse the generated script into a list of elements."""
|
|
|
|
|
|
|
|
|
|
|
153 |
sections = {}
|
154 |
current_title = None
|
155 |
current_text = ""
|
|
|
646 |
|
647 |
if narration_text and CAPTION_COLOR != "transparent":
|
648 |
try:
|
649 |
+
txt_clip = TextClip(
|
650 |
+
narration_text,
|
651 |
+
fontsize=font_size,
|
652 |
+
color=CAPTION_COLOR,
|
653 |
+
font='Arial-Bold',
|
654 |
+
bg_color='rgba(0,0,0,0.5)',
|
655 |
+
method='caption',
|
656 |
+
align='center',
|
657 |
+
size=(TARGET_RESOLUTION[0]*0.9, None),
|
658 |
+
stroke_color='black',
|
659 |
+
stroke_width=1
|
660 |
+
).set_duration(clip.duration)
|
661 |
+
txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1]*0.8))
|
662 |
+
clip = CompositeVideoClip([clip, txt_clip])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
except Exception as sub_error:
|
664 |
print(f"Subtitle error: {sub_error}")
|
665 |
txt_clip = TextClip(
|
|
|
667 |
fontsize=font_size,
|
668 |
color=CAPTION_COLOR,
|
669 |
align='center',
|
670 |
+
size=(TARGET_RESOLUTION[0]*0.8, None)
|
671 |
+
).set_position(('center', TARGET_RESOLUTION[1]*0.8)).set_duration(clip.duration)
|
672 |
clip = CompositeVideoClip([clip, txt_clip])
|
673 |
|
674 |
clip = clip.set_audio(audio_clip)
|
|
|
703 |
print(f"Error fixing policies: {e}")
|
704 |
return False
|
705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
706 |
# ---------------- Main Video Generation Function ---------------- #
|
707 |
def generate_video(user_input, resolution, caption_option):
|
708 |
"""Generate a video based on user input via Gradio."""
|
|
|
835 |
selected_voice = VOICE_CHOICES[voice]
|
836 |
voice_speed = v_speed
|
837 |
font_size = caption_size
|
838 |
+
video_clip_probability = vclip_prob / 100
|
839 |
bg_music_volume = bg_vol
|
840 |
fps = video_fps
|
841 |
preset = video_preset
|
|
|
846 |
shutil.copy(music_file.name, target_path)
|
847 |
print(f"Uploaded music saved as: {target_path}")
|
848 |
|
849 |
+
# First generate and return the script
|
850 |
+
print("Generating script from API...")
|
851 |
+
script = generate_script(user_input)
|
852 |
+
if not script:
|
853 |
+
return None, "Failed to generate script. Please try again."
|
854 |
+
|
855 |
+
# Then generate the video
|
856 |
+
video_path = generate_video(user_input, resolution, caption_option)
|
857 |
+
return video_path, script
|
858 |
|
859 |
# Create the Gradio interface
|
860 |
iface = gr.Interface(
|
|
|
868 |
gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
|
869 |
gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
|
870 |
gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
|
871 |
+
gr.Dropdown(
|
872 |
+
choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
|
873 |
+
value="veryfast",
|
874 |
+
label="Export Preset"
|
875 |
+
),
|
876 |
gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
|
877 |
gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
|
878 |
],
|
879 |
+
outputs=[
|
880 |
+
gr.Video(label="Generated Video"),
|
881 |
+
gr.Textbox(label="Generated Script", lines=10, interactive=False)
|
882 |
+
],
|
883 |
title="AI Documentary Video Generator",
|
884 |
+
description="Create short documentary videos with AI. Upload music, choose voice, and customize settings.",
|
885 |
+
allow_flagging="never"
|
886 |
)
|
887 |
|
888 |
# Launch the interface
|