VeoFlux

Running

App Files Files Community

testdeep123 commited on Apr 23

Commit

369793b

verified ·

1 Parent(s): 06b8655

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -138

app.py CHANGED Viewed

@@ -1,12 +1,7 @@
 # Import necessary libraries
 from kokoro import KPipeline
 import soundfile as sf
 import torch
-import soundfile as sf
 import os
 from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
 from PIL import Image
@@ -14,67 +9,46 @@ import tempfile
 import random
 import cv2
 import math
-import os, requests, io, time, re, random
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
     CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import gradio as gr
 import shutil
-import os
-import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
-from pydub.generators import Sine
-from PIL import Image, ImageDraw, ImageFont
-import numpy as np
 from bs4 import BeautifulSoup
-import base64
 from urllib.parse import quote
-import pysrt
 from gtts import gTTS
-import gradio as gr  # Import Gradio
 # Initialize Kokoro TTS pipeline (using American English)
-pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
 # Ensure ImageMagick binary is set
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 # ---------------- Global Configuration ---------------- #
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
-OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-# Additional global variables needed for the Gradio interface
-selected_voice = 'af_heart'  # Default voice
-voice_speed = 0.9  # Default voice speed
-font_size = 45  # Default font size
-video_clip_probability = 0.25  # Default probability for video clips
-bg_music_volume = 0.08  # Default background music volume
-fps = 30  # Default FPS
-preset = "veryfast"  # Default preset
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
 # ---------------- Helper Functions ---------------- #
-# (Your existing helper functions remain unchanged: generate_script, parse_script,
-# search_pexels_videos, search_pexels_images, search_google_images, download_image,
-# download_video, generate_media, generate_tts, apply_kenburns_effect,
-# resize_to_fill, find_mp3_files, add_background_music, create_clip,
-# fix_imagemagick_policy)
-# Define these globally as they were in your original code but will be set per run
-TARGET_RESOLUTION = None
-CAPTION_COLOR = None
-TEMP_FOLDER = None
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
@@ -91,51 +65,32 @@ If I provide a full script, rewrite it without any changes. Make everything shor
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
@@ -143,23 +98,23 @@ Top 5 unknown facts about North Korea.
 [Invisibility]
-North Korea’s internet speed is so fast… it doesn’t exist.
 [Leadership]
-Kim Jong-un once won an election with 100% votes… against himself.
 [Magic]
-North Korea discovered time travel. That’s why their news is always from the past.
 [Warning]
-Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 [Freedom]
-North Korean citizens can do anything�� as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
@@ -194,12 +149,7 @@ Now here is the Topic/scrip: {user_input}
         return None
 def parse_script(script_text):
-    """
-    Parse the generated script into a list of elements.
-    For each section, create two elements:
-      - A 'media' element using the section title as the visual prompt.
-      - A 'tts' element with the narration text, voice info, and computed duration.
-    """
     sections = {}
     current_title = None
     current_text = ""
@@ -696,40 +646,20 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
         if narration_text and CAPTION_COLOR != "transparent":
             try:
-                words = narration_text.split()
-                chunks = []
-                current_chunk = []
-                for word in words:
-                    current_chunk.append(word)
-                    if len(current_chunk) >= 5:
-                        chunks.append(' '.join(current_chunk))
-                        current_chunk = []
-                if current_chunk:
-                    chunks.append(' '.join(current_chunk))
-                chunk_duration = audio_duration / len(chunks)
-                subtitle_clips = []
-                subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
-                for i, chunk_text in enumerate(chunks):
-                    start_time = i * chunk_duration
-                    end_time = (i + 1) * chunk_duration
-                    txt_clip = TextClip(
-                        chunk_text,
-                        fontsize=45,
-                        font='Arial-Bold',
-                        color=CAPTION_COLOR,
-                        bg_color='rgba(0, 0, 0, 0.25)',
-                        method='caption',
-                        align='center',
-                        stroke_width=2,
-                        stroke_color=CAPTION_COLOR,
-                        size=(TARGET_RESOLUTION[0] * 0.8, None)
-                    ).set_start(start_time).set_end(end_time)
-                    txt_clip = txt_clip.set_position(('center', subtitle_y_position))
-                    subtitle_clips.append(txt_clip)
-                clip = CompositeVideoClip([clip] + subtitle_clips)
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
                 txt_clip = TextClip(
@@ -737,8 +667,8 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
                     fontsize=font_size,
                     color=CAPTION_COLOR,
                     align='center',
-                    size=(TARGET_RESOLUTION[0] * 0.7, None)
-                ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
@@ -773,32 +703,6 @@ def fix_imagemagick_policy():
         print(f"Error fixing policies: {e}")
         return False
 # ---------------- Main Video Generation Function ---------------- #
 def generate_video(user_input, resolution, caption_option):
     """Generate a video based on user input via Gradio."""
@@ -931,7 +835,7 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
     selected_voice = VOICE_CHOICES[voice]
     voice_speed = v_speed
     font_size = caption_size
-    video_clip_probability = vclip_prob / 100  # Convert from percentage to decimal
     bg_music_volume = bg_vol
     fps = video_fps
     preset = video_preset
@@ -942,8 +846,15 @@ def generate_video_with_options(user_input, resolution, caption_option, music_fi
         shutil.copy(music_file.name, target_path)
         print(f"Uploaded music saved as: {target_path}")
-    # Generate the video
-    return generate_video(user_input, resolution, caption_option)
 # Create the Gradio interface
 iface = gr.Interface(
@@ -957,14 +868,21 @@ iface = gr.Interface(
         gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
         gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
         gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
-        gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
-                   value="veryfast", label="Export Preset"),
         gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
         gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
     ],
-    outputs=gr.Video(label="Generated Video"),
     title="AI Documentary Video Generator",
-    description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
 )
 # Launch the interface

 # Import necessary libraries
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 import os
 from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
 from PIL import Image
 import random
 import cv2
 import math
+import requests
+import time
+import re
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
     CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import gradio as gr
 import shutil
 import moviepy.config as mpy_config
 from pydub import AudioSegment
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 from gtts import gTTS
 # Initialize Kokoro TTS pipeline (using American English)
+pipeline = KPipeline(lang_code='a')
 # Ensure ImageMagick binary is set
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 # ---------------- Global Configuration ---------------- #
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
+OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# Additional global variables
+selected_voice = 'af_heart'
+voice_speed = 0.9
+font_size = 45
+video_clip_probability = 0.25
+bg_music_volume = 0.08
+fps = 30
+preset = "veryfast"
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
 # ---------------- Helper Functions ---------------- #
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
 [Invisibility]
+North Korea's internet speed is so fast... it doesn't exist.
 [Leadership]
+Kim Jong-un once won an election with 100% votes... against himself.
 [Magic]
+North Korea discovered time travel. That's why their news is always from the past.
 [Warning]
+Subscribe now, or Kim Jong-un will send you a free one-way ticket... to North Korea.
 [Freedom]
+North Korean citizens can do anything... as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
         return None
 def parse_script(script_text):
+    """Parse the generated script into a list of elements."""
     sections = {}
     current_title = None
     current_text = ""
         if narration_text and CAPTION_COLOR != "transparent":
             try:
+                txt_clip = TextClip(
+                    narration_text,
+                    fontsize=font_size,
+                    color=CAPTION_COLOR,
+                    font='Arial-Bold',
+                    bg_color='rgba(0,0,0,0.5)',
+                    method='caption',
+                    align='center',
+                    size=(TARGET_RESOLUTION[0]*0.9, None),
+                    stroke_color='black',
+                    stroke_width=1
+                ).set_duration(clip.duration)
+                txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1]*0.8))
+                clip = CompositeVideoClip([clip, txt_clip])
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
                 txt_clip = TextClip(
                     fontsize=font_size,
                     color=CAPTION_COLOR,
                     align='center',
+                    size=(TARGET_RESOLUTION[0]*0.8, None)
+                ).set_position(('center', TARGET_RESOLUTION[1]*0.8)).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
         print(f"Error fixing policies: {e}")
         return False
 # ---------------- Main Video Generation Function ---------------- #
 def generate_video(user_input, resolution, caption_option):
     """Generate a video based on user input via Gradio."""
     selected_voice = VOICE_CHOICES[voice]
     voice_speed = v_speed
     font_size = caption_size
+    video_clip_probability = vclip_prob / 100
     bg_music_volume = bg_vol
     fps = video_fps
     preset = video_preset
         shutil.copy(music_file.name, target_path)
         print(f"Uploaded music saved as: {target_path}")
+    # First generate and return the script
+    print("Generating script from API...")
+    script = generate_script(user_input)
+    if not script:
+        return None, "Failed to generate script. Please try again."
+    # Then generate the video
+    video_path = generate_video(user_input, resolution, caption_option)
+    return video_path, script
 # Create the Gradio interface
 iface = gr.Interface(
         gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
         gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
         gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
+        gr.Dropdown(
+            choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
+            value="veryfast",
+            label="Export Preset"
+        ),
         gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
         gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
     ],
+    outputs=[
+        gr.Video(label="Generated Video"),
+        gr.Textbox(label="Generated Script", lines=10, interactive=False)
+    ],
     title="AI Documentary Video Generator",
+    description="Create short documentary videos with AI. Upload music, choose voice, and customize settings.",
+    allow_flagging="never"
 )
 # Launch the interface