Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,12 @@
|
|
|
|
|
|
1 |
# Import necessary libraries
|
2 |
from kokoro import KPipeline
|
|
|
3 |
import soundfile as sf
|
4 |
import torch
|
|
|
|
|
5 |
import os
|
6 |
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
|
7 |
from PIL import Image
|
@@ -9,46 +14,67 @@ import tempfile
|
|
9 |
import random
|
10 |
import cv2
|
11 |
import math
|
12 |
-
import requests
|
13 |
-
import time
|
14 |
-
import re
|
15 |
from moviepy.editor import (
|
16 |
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
|
17 |
CompositeVideoClip, TextClip, CompositeAudioClip
|
18 |
)
|
19 |
import gradio as gr
|
20 |
import shutil
|
|
|
|
|
21 |
import moviepy.config as mpy_config
|
22 |
from pydub import AudioSegment
|
|
|
|
|
|
|
|
|
23 |
from bs4 import BeautifulSoup
|
|
|
24 |
from urllib.parse import quote
|
|
|
25 |
from gtts import gTTS
|
|
|
26 |
|
27 |
# Initialize Kokoro TTS pipeline (using American English)
|
28 |
-
pipeline = KPipeline(lang_code='a')
|
29 |
# Ensure ImageMagick binary is set
|
30 |
mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
|
31 |
|
32 |
# ---------------- Global Configuration ---------------- #
|
33 |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
|
34 |
OPENROUTER_API_KEY = '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'
|
35 |
-
OPENROUTER_MODEL = "
|
36 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
37 |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
TARGET_RESOLUTION = None
|
48 |
CAPTION_COLOR = None
|
49 |
TEMP_FOLDER = None
|
50 |
|
|
|
51 |
# ---------------- Helper Functions ---------------- #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def generate_script(user_input):
|
53 |
"""Generate documentary script with proper OpenRouter handling."""
|
54 |
headers = {
|
@@ -65,32 +91,51 @@ If I provide a full script, rewrite it without any changes. Make everything shor
|
|
65 |
And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
|
66 |
Formatting Rules:
|
67 |
|
|
|
68 |
Title in Square Brackets:
|
69 |
|
|
|
70 |
Each section starts with a one-word title inside [ ] (max two words if necessary).
|
71 |
This title will be used as a search term for Pexels footage.
|
72 |
|
|
|
|
|
73 |
Casual & Funny Narration:
|
74 |
|
|
|
75 |
Each section has 5-10 words of narration.
|
76 |
Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
|
77 |
|
|
|
|
|
78 |
No Special Formatting:
|
79 |
|
|
|
80 |
No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
|
81 |
|
|
|
|
|
82 |
Generalized Search Terms:
|
83 |
|
|
|
84 |
If a term is too specific, make it more general for Pexels search.
|
85 |
|
|
|
|
|
86 |
Scene-Specific Writing:
|
87 |
|
|
|
88 |
Each section describes only what should be shown in the video.
|
89 |
|
|
|
|
|
90 |
Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
|
91 |
|
|
|
92 |
No extra text, just the script.
|
93 |
|
|
|
|
|
94 |
Example Output:
|
95 |
[North Korea]
|
96 |
|
@@ -98,23 +143,23 @@ Top 5 unknown facts about North Korea.
|
|
98 |
|
99 |
[Invisibility]
|
100 |
|
101 |
-
North Korea
|
102 |
|
103 |
[Leadership]
|
104 |
|
105 |
-
Kim Jong-un once won an election with 100% votes
|
106 |
|
107 |
[Magic]
|
108 |
|
109 |
-
North Korea discovered time travel. That
|
110 |
|
111 |
[Warning]
|
112 |
|
113 |
-
Subscribe now, or Kim Jong-un will send you a free one-way ticket
|
114 |
|
115 |
[Freedom]
|
116 |
|
117 |
-
North Korean citizens can do anything
|
118 |
Now here is the Topic/scrip: {user_input}
|
119 |
"""
|
120 |
|
@@ -149,7 +194,12 @@ Now here is the Topic/scrip: {user_input}
|
|
149 |
return None
|
150 |
|
151 |
def parse_script(script_text):
|
152 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
153 |
sections = {}
|
154 |
current_title = None
|
155 |
current_text = ""
|
@@ -612,6 +662,19 @@ def add_background_music(final_video, bg_music_volume=0.10):
|
|
612 |
print("Continuing without background music")
|
613 |
return final_video
|
614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
|
616 |
"""Create a video clip with synchronized subtitles and narration."""
|
617 |
try:
|
@@ -644,33 +707,52 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
|
|
644 |
else:
|
645 |
return None
|
646 |
|
|
|
647 |
if narration_text and CAPTION_COLOR != "transparent":
|
648 |
try:
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
except Exception as sub_error:
|
664 |
print(f"Subtitle error: {sub_error}")
|
665 |
txt_clip = TextClip(
|
666 |
narration_text,
|
667 |
fontsize=font_size,
|
|
|
668 |
color=CAPTION_COLOR,
|
669 |
-
|
670 |
-
|
671 |
-
).set_position(('center', TARGET_RESOLUTION[1]
|
672 |
clip = CompositeVideoClip([clip, txt_clip])
|
673 |
|
|
|
674 |
clip = clip.set_audio(audio_clip)
|
675 |
print(f"Clip created: {clip.duration:.1f}s")
|
676 |
return clip
|
@@ -678,30 +760,24 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
|
|
678 |
print(f"Error in create_clip: {str(e)}")
|
679 |
return None
|
680 |
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
|
700 |
-
print("ImageMagick policies updated successfully.")
|
701 |
-
return True
|
702 |
-
except Exception as e:
|
703 |
-
print(f"Error fixing policies: {e}")
|
704 |
-
return False
|
705 |
|
706 |
# ---------------- Main Video Generation Function ---------------- #
|
707 |
def generate_video(user_input, resolution, caption_option):
|
@@ -835,7 +911,7 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
|
|
835 |
selected_voice = VOICE_CHOICES[voice]
|
836 |
voice_speed = v_speed
|
837 |
font_size = caption_size
|
838 |
-
video_clip_probability = vclip_prob / 100
|
839 |
bg_music_volume = bg_vol
|
840 |
fps = video_fps
|
841 |
preset = video_preset
|
@@ -846,15 +922,8 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
|
|
846 |
shutil.copy(music_file.name, target_path)
|
847 |
print(f"Uploaded music saved as: {target_path}")
|
848 |
|
849 |
-
#
|
850 |
-
|
851 |
-
script = generate_script(user_input)
|
852 |
-
if not script:
|
853 |
-
return None, "Failed to generate script. Please try again."
|
854 |
-
|
855 |
-
# Then generate the video
|
856 |
-
video_path = generate_video(user_input, resolution, caption_option)
|
857 |
-
return video_path, script
|
858 |
|
859 |
# Create the Gradio interface
|
860 |
iface = gr.Interface(
|
@@ -868,21 +937,14 @@ iface = gr.Interface(
|
|
868 |
gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
|
869 |
gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
|
870 |
gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
|
871 |
-
gr.Dropdown(
|
872 |
-
|
873 |
-
value="veryfast",
|
874 |
-
label="Export Preset"
|
875 |
-
),
|
876 |
gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
|
877 |
gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
|
878 |
],
|
879 |
-
outputs=
|
880 |
-
gr.Video(label="Generated Video"),
|
881 |
-
gr.Textbox(label="Generated Script", lines=10, interactive=False)
|
882 |
-
],
|
883 |
title="AI Documentary Video Generator",
|
884 |
-
description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
|
885 |
-
allow_flagging="never"
|
886 |
)
|
887 |
|
888 |
# Launch the interface
|
|
|
1 |
+
|
2 |
+
|
3 |
# Import necessary libraries
|
4 |
from kokoro import KPipeline
|
5 |
+
|
6 |
import soundfile as sf
|
7 |
import torch
|
8 |
+
|
9 |
+
import soundfile as sf
|
10 |
import os
|
11 |
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
|
12 |
from PIL import Image
|
|
|
14 |
import random
|
15 |
import cv2
|
16 |
import math
|
17 |
+
import os, requests, io, time, re, random
|
|
|
|
|
18 |
from moviepy.editor import (
|
19 |
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
|
20 |
CompositeVideoClip, TextClip, CompositeAudioClip
|
21 |
)
|
22 |
import gradio as gr
|
23 |
import shutil
|
24 |
+
import os
|
25 |
+
import moviepy.video.fx.all as vfx
|
26 |
import moviepy.config as mpy_config
|
27 |
from pydub import AudioSegment
|
28 |
+
from pydub.generators import Sine
|
29 |
+
|
30 |
+
from PIL import Image, ImageDraw, ImageFont
|
31 |
+
import numpy as np
|
32 |
from bs4 import BeautifulSoup
|
33 |
+
import base64
|
34 |
from urllib.parse import quote
|
35 |
+
import pysrt
|
36 |
from gtts import gTTS
|
37 |
+
import gradio as gr # Import Gradio
|
38 |
|
39 |
# Initialize Kokoro TTS pipeline (using American English)
|
40 |
+
pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
|
41 |
# Ensure ImageMagick binary is set
|
42 |
mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
|
43 |
|
44 |
# ---------------- Global Configuration ---------------- #
|
45 |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
|
46 |
OPENROUTER_API_KEY = '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'
|
47 |
+
OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
|
48 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
49 |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
50 |
|
51 |
+
|
52 |
+
|
53 |
+
# Additional global variables needed for the Gradio interface
|
54 |
+
selected_voice = 'af_heart' # Default voice
|
55 |
+
voice_speed = 0.9 # Default voice speed
|
56 |
+
font_size = 45 # Default font size
|
57 |
+
video_clip_probability = 0.25 # Default probability for video clips
|
58 |
+
bg_music_volume = 0.08 # Default background music volume
|
59 |
+
fps = 30 # Default FPS
|
60 |
+
preset = "veryfast" # Default preset
|
61 |
TARGET_RESOLUTION = None
|
62 |
CAPTION_COLOR = None
|
63 |
TEMP_FOLDER = None
|
64 |
|
65 |
+
|
66 |
# ---------------- Helper Functions ---------------- #
|
67 |
+
# (Your existing helper functions remain unchanged: generate_script, parse_script,
|
68 |
+
# search_pexels_videos, search_pexels_images, search_google_images, download_image,
|
69 |
+
# download_video, generate_media, generate_tts, apply_kenburns_effect,
|
70 |
+
# resize_to_fill, find_mp3_files, add_background_music, create_clip,
|
71 |
+
# fix_imagemagick_policy)
|
72 |
+
|
73 |
+
# Define these globally as they were in your original code but will be set per run
|
74 |
+
TARGET_RESOLUTION = None
|
75 |
+
CAPTION_COLOR = None
|
76 |
+
TEMP_FOLDER = None
|
77 |
+
|
78 |
def generate_script(user_input):
|
79 |
"""Generate documentary script with proper OpenRouter handling."""
|
80 |
headers = {
|
|
|
91 |
And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
|
92 |
Formatting Rules:
|
93 |
|
94 |
+
|
95 |
Title in Square Brackets:
|
96 |
|
97 |
+
|
98 |
Each section starts with a one-word title inside [ ] (max two words if necessary).
|
99 |
This title will be used as a search term for Pexels footage.
|
100 |
|
101 |
+
|
102 |
+
|
103 |
Casual & Funny Narration:
|
104 |
|
105 |
+
|
106 |
Each section has 5-10 words of narration.
|
107 |
Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
|
108 |
|
109 |
+
|
110 |
+
|
111 |
No Special Formatting:
|
112 |
|
113 |
+
|
114 |
No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
|
115 |
|
116 |
+
|
117 |
+
|
118 |
Generalized Search Terms:
|
119 |
|
120 |
+
|
121 |
If a term is too specific, make it more general for Pexels search.
|
122 |
|
123 |
+
|
124 |
+
|
125 |
Scene-Specific Writing:
|
126 |
|
127 |
+
|
128 |
Each section describes only what should be shown in the video.
|
129 |
|
130 |
+
|
131 |
+
|
132 |
Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
|
133 |
|
134 |
+
|
135 |
No extra text, just the script.
|
136 |
|
137 |
+
|
138 |
+
|
139 |
Example Output:
|
140 |
[North Korea]
|
141 |
|
|
|
143 |
|
144 |
[Invisibility]
|
145 |
|
146 |
+
North Korea’s internet speed is so fast… it doesn’t exist.
|
147 |
|
148 |
[Leadership]
|
149 |
|
150 |
+
Kim Jong-un once won an election with 100% votes… against himself.
|
151 |
|
152 |
[Magic]
|
153 |
|
154 |
+
North Korea discovered time travel. That’s why their news is always from the past.
|
155 |
|
156 |
[Warning]
|
157 |
|
158 |
+
Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
|
159 |
|
160 |
[Freedom]
|
161 |
|
162 |
+
North Korean citizens can do anything… as long as it's government-approved.
|
163 |
Now here is the Topic/scrip: {user_input}
|
164 |
"""
|
165 |
|
|
|
194 |
return None
|
195 |
|
196 |
def parse_script(script_text):
|
197 |
+
"""
|
198 |
+
Parse the generated script into a list of elements.
|
199 |
+
For each section, create two elements:
|
200 |
+
- A 'media' element using the section title as the visual prompt.
|
201 |
+
- A 'tts' element with the narration text, voice info, and computed duration.
|
202 |
+
"""
|
203 |
sections = {}
|
204 |
current_title = None
|
205 |
current_text = ""
|
|
|
662 |
print("Continuing without background music")
|
663 |
return final_video
|
664 |
|
665 |
+
|
666 |
+
|
667 |
+
|
668 |
+
|
669 |
+
|
670 |
+
|
671 |
+
|
672 |
+
|
673 |
+
|
674 |
+
|
675 |
+
|
676 |
+
|
677 |
+
|
678 |
def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
|
679 |
"""Create a video clip with synchronized subtitles and narration."""
|
680 |
try:
|
|
|
707 |
else:
|
708 |
return None
|
709 |
|
710 |
+
# Subtitle rendering
|
711 |
if narration_text and CAPTION_COLOR != "transparent":
|
712 |
try:
|
713 |
+
words = narration_text.split()
|
714 |
+
chunks = []
|
715 |
+
current_chunk = []
|
716 |
+
for word in words:
|
717 |
+
current_chunk.append(word)
|
718 |
+
if len(current_chunk) >= 5:
|
719 |
+
chunks.append(' '.join(current_chunk))
|
720 |
+
current_chunk = []
|
721 |
+
if current_chunk:
|
722 |
+
chunks.append(' '.join(current_chunk))
|
723 |
+
|
724 |
+
chunk_duration = audio_duration / len(chunks)
|
725 |
+
subtitle_clips = []
|
726 |
+
subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
|
727 |
+
#
|
728 |
+
for i, chunk_text in enumerate(chunks):
|
729 |
+
start_time = i * chunk_duration
|
730 |
+
end_time = (i + 1) * chunk_duration
|
731 |
+
txt_clip = TextClip(
|
732 |
+
chunk_text,
|
733 |
+
fontsize=font_size,
|
734 |
+
font='DejaVu-Sans',
|
735 |
+
color=CAPTION_COLOR,
|
736 |
+
stroke_width=2,
|
737 |
+
stroke_color='black'
|
738 |
+
).set_start(start_time).set_end(end_time)
|
739 |
+
txt_clip = txt_clip.set_position(('center', subtitle_y_position))
|
740 |
+
subtitle_clips.append(txt_clip)
|
741 |
+
|
742 |
+
clip = CompositeVideoClip([clip] + subtitle_clips)
|
743 |
except Exception as sub_error:
|
744 |
print(f"Subtitle error: {sub_error}")
|
745 |
txt_clip = TextClip(
|
746 |
narration_text,
|
747 |
fontsize=font_size,
|
748 |
+
font='DejaVu-Sans',
|
749 |
color=CAPTION_COLOR,
|
750 |
+
stroke_width=2,
|
751 |
+
stroke_color='black'
|
752 |
+
).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
|
753 |
clip = CompositeVideoClip([clip, txt_clip])
|
754 |
|
755 |
+
|
756 |
clip = clip.set_audio(audio_clip)
|
757 |
print(f"Clip created: {clip.duration:.1f}s")
|
758 |
return clip
|
|
|
760 |
print(f"Error in create_clip: {str(e)}")
|
761 |
return None
|
762 |
|
763 |
+
|
764 |
+
|
765 |
+
|
766 |
+
|
767 |
+
|
768 |
+
|
769 |
+
|
770 |
+
|
771 |
+
|
772 |
+
|
773 |
+
|
774 |
+
|
775 |
+
|
776 |
+
|
777 |
+
|
778 |
+
|
779 |
+
|
780 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
781 |
|
782 |
# ---------------- Main Video Generation Function ---------------- #
|
783 |
def generate_video(user_input, resolution, caption_option):
|
|
|
911 |
selected_voice = VOICE_CHOICES[voice]
|
912 |
voice_speed = v_speed
|
913 |
font_size = caption_size
|
914 |
+
video_clip_probability = vclip_prob / 100 # Convert from percentage to decimal
|
915 |
bg_music_volume = bg_vol
|
916 |
fps = video_fps
|
917 |
preset = video_preset
|
|
|
922 |
shutil.copy(music_file.name, target_path)
|
923 |
print(f"Uploaded music saved as: {target_path}")
|
924 |
|
925 |
+
# Generate the video
|
926 |
+
return generate_video(user_input, resolution, caption_option)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
927 |
|
928 |
# Create the Gradio interface
|
929 |
iface = gr.Interface(
|
|
|
937 |
gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
|
938 |
gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
|
939 |
gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
|
940 |
+
gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
|
941 |
+
value="veryfast", label="Export Preset"),
|
|
|
|
|
|
|
942 |
gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
|
943 |
gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
|
944 |
],
|
945 |
+
outputs=gr.Video(label="Generated Video"),
|
|
|
|
|
|
|
946 |
title="AI Documentary Video Generator",
|
947 |
+
description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
|
|
|
948 |
)
|
949 |
|
950 |
# Launch the interface
|