VeoFlux

Running

App Files Files Community

testdeep123 commited on 11 days ago

Commit

1ac7903

verified ·

1 Parent(s): 369793b

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -81

app.py CHANGED Viewed

@@ -1,7 +1,12 @@
 # Import necessary libraries
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 import os
 from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
 from PIL import Image
@@ -9,46 +14,67 @@ import tempfile
 import random
 import cv2
 import math
-import requests
-import time
-import re
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
     CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import gradio as gr
 import shutil
 import moviepy.config as mpy_config
 from pydub import AudioSegment
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 from gtts import gTTS
 # Initialize Kokoro TTS pipeline (using American English)
-pipeline = KPipeline(lang_code='a')
 # Ensure ImageMagick binary is set
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 # ---------------- Global Configuration ---------------- #
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
-OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-# Additional global variables
-selected_voice = 'af_heart'
-voice_speed = 0.9
-font_size = 45
-video_clip_probability = 0.25
-bg_music_volume = 0.08
-fps = 30
-preset = "veryfast"
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
 # ---------------- Helper Functions ---------------- #
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
@@ -65,32 +91,51 @@ If I provide a full script, rewrite it without any changes. Make everything shor
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
@@ -98,23 +143,23 @@ Top 5 unknown facts about North Korea.
 [Invisibility]
-North Korea's internet speed is so fast... it doesn't exist.
 [Leadership]
-Kim Jong-un once won an election with 100% votes... against himself.
 [Magic]
-North Korea discovered time travel. That's why their news is always from the past.
 [Warning]
-Subscribe now, or Kim Jong-un will send you a free one-way ticket... to North Korea.
 [Freedom]
-North Korean citizens can do anything... as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
@@ -149,7 +194,12 @@ Now here is the Topic/scrip: {user_input}
         return None
 def parse_script(script_text):
-    """Parse the generated script into a list of elements."""
     sections = {}
     current_title = None
     current_text = ""
@@ -612,6 +662,19 @@ def add_background_music(final_video, bg_music_volume=0.10):
         print("Continuing without background music")
         return final_video
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
     """Create a video clip with synchronized subtitles and narration."""
     try:
@@ -644,33 +707,52 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
         else:
             return None
         if narration_text and CAPTION_COLOR != "transparent":
             try:
-                txt_clip = TextClip(
-                    narration_text,
-                    fontsize=font_size,
-                    color=CAPTION_COLOR,
-                    font='Arial-Bold',
-                    bg_color='rgba(0,0,0,0.5)',
-                    method='caption',
-                    align='center',
-                    size=(TARGET_RESOLUTION[0]*0.9, None),
-                    stroke_color='black',
-                    stroke_width=1
-                ).set_duration(clip.duration)
-                txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1]*0.8))
-                clip = CompositeVideoClip([clip, txt_clip])
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
                 txt_clip = TextClip(
                     narration_text,
                     fontsize=font_size,
                     color=CAPTION_COLOR,
-                    align='center',
-                    size=(TARGET_RESOLUTION[0]*0.8, None)
-                ).set_position(('center', TARGET_RESOLUTION[1]*0.8)).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
         print(f"Clip created: {clip.duration:.1f}s")
         return clip
@@ -678,30 +760,24 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
         print(f"Error in create_clip: {str(e)}")
         return None
-def fix_imagemagick_policy():
-    """Fix ImageMagick security policies."""
-    try:
-        print("Attempting to fix ImageMagick security policies...")
-        policy_paths = [
-            "/etc/ImageMagick-6/policy.xml",
-            "/etc/ImageMagick-7/policy.xml",
-            "/etc/ImageMagick/policy.xml",
-            "/usr/local/etc/ImageMagick-7/policy.xml"
-        ]
-        found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
-        if not found_policy:
-            print("No policy.xml found. Using alternative subtitle method.")
-            return False
-        print(f"Modifying policy file at {found_policy}")
-        os.system(f"sudo cp {found_policy} {found_policy}.bak")
-        os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
-        os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
-        os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
-        print("ImageMagick policies updated successfully.")
-        return True
-    except Exception as e:
-        print(f"Error fixing policies: {e}")
-        return False
 # ---------------- Main Video Generation Function ---------------- #
 def generate_video(user_input, resolution, caption_option):
@@ -835,7 +911,7 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
     selected_voice = VOICE_CHOICES[voice]
     voice_speed = v_speed
     font_size = caption_size
-    video_clip_probability = vclip_prob / 100
     bg_music_volume = bg_vol
     fps = video_fps
     preset = video_preset
@@ -846,15 +922,8 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
         shutil.copy(music_file.name, target_path)
         print(f"Uploaded music saved as: {target_path}")
-    # First generate and return the script
-    print("Generating script from API...")
-    script = generate_script(user_input)
-    if not script:
-        return None, "Failed to generate script. Please try again."
-    # Then generate the video
-    video_path = generate_video(user_input, resolution, caption_option)
-    return video_path, script
 # Create the Gradio interface
 iface = gr.Interface(
@@ -868,21 +937,14 @@ iface = gr.Interface(
         gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
         gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
         gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
-        gr.Dropdown(
-            choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
-            value="veryfast",
-            label="Export Preset"
-        ),
         gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
         gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
     ],
-    outputs=[
-        gr.Video(label="Generated Video"),
-        gr.Textbox(label="Generated Script", lines=10, interactive=False)
-    ],
     title="AI Documentary Video Generator",
-    description="Create short documentary videos with AI. Upload music, choose voice, and customize settings.",
-    allow_flagging="never"
 )
 # Launch the interface

 # Import necessary libraries
 from kokoro import KPipeline
 import soundfile as sf
 import torch
+import soundfile as sf
 import os
 from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
 from PIL import Image
 import random
 import cv2
 import math
+import os, requests, io, time, re, random
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
     CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import gradio as gr
 import shutil
+import os
+import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
+from pydub.generators import Sine
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
 from bs4 import BeautifulSoup
+import base64
 from urllib.parse import quote
+import pysrt
 from gtts import gTTS
+import gradio as gr  # Import Gradio
 # Initialize Kokoro TTS pipeline (using American English)
+pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
 # Ensure ImageMagick binary is set
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 # ---------------- Global Configuration ---------------- #
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
+OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# Additional global variables needed for the Gradio interface
+selected_voice = 'af_heart'  # Default voice
+voice_speed = 0.9  # Default voice speed
+font_size = 45  # Default font size
+video_clip_probability = 0.25  # Default probability for video clips
+bg_music_volume = 0.08  # Default background music volume
+fps = 30  # Default FPS
+preset = "veryfast"  # Default preset
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
 # ---------------- Helper Functions ---------------- #
+# (Your existing helper functions remain unchanged: generate_script, parse_script,
+# search_pexels_videos, search_pexels_images, search_google_images, download_image,
+# download_video, generate_media, generate_tts, apply_kenburns_effect,
+# resize_to_fill, find_mp3_files, add_background_music, create_clip,
+# fix_imagemagick_policy)
+# Define these globally as they were in your original code but will be set per run
+TARGET_RESOLUTION = None
+CAPTION_COLOR = None
+TEMP_FOLDER = None
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
 [Invisibility]
+North Korea’s internet speed is so fast… it doesn’t exist.
 [Leadership]
+Kim Jong-un once won an election with 100% votes… against himself.
 [Magic]
+North Korea discovered time travel. That’s why their news is always from the past.
 [Warning]
+Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 [Freedom]
+North Korean citizens can do anything… as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
         return None
 def parse_script(script_text):
+    """
+    Parse the generated script into a list of elements.
+    For each section, create two elements:
+      - A 'media' element using the section title as the visual prompt.
+      - A 'tts' element with the narration text, voice info, and computed duration.
+    """
     sections = {}
     current_title = None
     current_text = ""
         print("Continuing without background music")
         return final_video
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
     """Create a video clip with synchronized subtitles and narration."""
     try:
         else:
             return None
+        # Subtitle rendering
         if narration_text and CAPTION_COLOR != "transparent":
             try:
+                words = narration_text.split()
+                chunks = []
+                current_chunk = []
+                for word in words:
+                    current_chunk.append(word)
+                    if len(current_chunk) >= 5:
+                        chunks.append(' '.join(current_chunk))
+                        current_chunk = []
+                if current_chunk:
+                    chunks.append(' '.join(current_chunk))
+                chunk_duration = audio_duration / len(chunks)
+                subtitle_clips = []
+                subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
+                #
+                for i, chunk_text in enumerate(chunks):
+                    start_time = i * chunk_duration
+                    end_time = (i + 1) * chunk_duration
+                    txt_clip = TextClip(
+                        chunk_text,
+                        fontsize=font_size,
+                        font='DejaVu-Sans',
+                        color=CAPTION_COLOR,
+                        stroke_width=2,
+                        stroke_color='black'
+                    ).set_start(start_time).set_end(end_time)
+                    txt_clip = txt_clip.set_position(('center', subtitle_y_position))
+                    subtitle_clips.append(txt_clip)
+                clip = CompositeVideoClip([clip] + subtitle_clips)
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
                 txt_clip = TextClip(
                     narration_text,
                     fontsize=font_size,
+                    font='DejaVu-Sans',
                     color=CAPTION_COLOR,
+                    stroke_width=2,
+                    stroke_color='black'
+                ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
         print(f"Clip created: {clip.duration:.1f}s")
         return clip
         print(f"Error in create_clip: {str(e)}")
         return None
 # ---------------- Main Video Generation Function ---------------- #
 def generate_video(user_input, resolution, caption_option):
     selected_voice = VOICE_CHOICES[voice]
     voice_speed = v_speed
     font_size = caption_size
+    video_clip_probability = vclip_prob / 100  # Convert from percentage to decimal
     bg_music_volume = bg_vol
     fps = video_fps
     preset = video_preset
         shutil.copy(music_file.name, target_path)
         print(f"Uploaded music saved as: {target_path}")
+    # Generate the video
+    return generate_video(user_input, resolution, caption_option)
 # Create the Gradio interface
 iface = gr.Interface(
         gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
         gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
         gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
+        gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
+                   value="veryfast", label="Export Preset"),
         gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
         gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
     ],
+    outputs=gr.Video(label="Generated Video"),
     title="AI Documentary Video Generator",
+    description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
 )
 # Launch the interface