Spaces:
Running
Running
Create app2.py
Browse files
app2.py
ADDED
@@ -0,0 +1,582 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from PIL import Image
|
3 |
+
from moviepy.editor import VideoFileClip, AudioFileClip
|
4 |
+
import os
|
5 |
+
from openai import OpenAI
|
6 |
+
import subprocess
|
7 |
+
from pathlib import Path
|
8 |
+
import uuid
|
9 |
+
import tempfile
|
10 |
+
import shlex
|
11 |
+
import shutil
|
12 |
+
import logging # 添加日志记录
|
13 |
+
|
14 |
+
# 配置日志记录
|
15 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
16 |
+
|
17 |
+
# Supported models configuration
|
18 |
+
MODELS = {
|
19 |
+
"deepseek-ai/DeepSeek-V3": {
|
20 |
+
"base_url": "https://api.deepseek.com/v1",
|
21 |
+
"env_key": "DEEPSEEK_API_KEY",
|
22 |
+
},
|
23 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": {
|
24 |
+
"base_url": "https://api-inference.huggingface.co/v1/",
|
25 |
+
"env_key": "HF_TOKEN",
|
26 |
+
},
|
27 |
+
# Add more models here if needed
|
28 |
+
}
|
29 |
+
|
30 |
+
# Function to get the first available API key
|
31 |
+
def get_first_available_key_config():
|
32 |
+
for model, config in MODELS.items():
|
33 |
+
if config["env_key"] in os.environ and os.environ[config["env_key"]]:
|
34 |
+
logging.info(f"Using API key for model: {model}")
|
35 |
+
return config
|
36 |
+
return None
|
37 |
+
|
38 |
+
# Initialize client with first available model configuration
|
39 |
+
initial_config = get_first_available_key_config()
|
40 |
+
if initial_config:
|
41 |
+
client = OpenAI(
|
42 |
+
base_url=initial_config["base_url"],
|
43 |
+
api_key=os.environ[initial_config["env_key"]],
|
44 |
+
)
|
45 |
+
initial_model_choice = next(iter(MODELS.keys())) # Keep track of which model config was used initially
|
46 |
+
else:
|
47 |
+
logging.warning("No API keys found in environment variables for configured models. API calls will fail.")
|
48 |
+
# Initialize with placeholder values or handle error as appropriate
|
49 |
+
client = None # Or raise an error, or use a default config if applicable
|
50 |
+
initial_model_choice = list(MODELS.keys())[0] # Default UI selection
|
51 |
+
|
52 |
+
allowed_medias = [
|
53 |
+
".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
|
54 |
+
".mp3", ".wav", ".ogg", ".aac", ".flac", # Added more audio types
|
55 |
+
".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
|
56 |
+
".3gp", ".3g2", ".3gpp",
|
57 |
+
]
|
58 |
+
|
59 |
+
|
60 |
+
def get_files_infos(files):
|
61 |
+
results = []
|
62 |
+
if not files:
|
63 |
+
return results
|
64 |
+
|
65 |
+
for file_obj in files:
|
66 |
+
file_path = Path(file_obj.name)
|
67 |
+
info = {"error": None} # Initialize error as None
|
68 |
+
try:
|
69 |
+
info["size"] = os.path.getsize(file_path)
|
70 |
+
# Sanitize filename by replacing spaces with underscores
|
71 |
+
original_name = file_path.name
|
72 |
+
info["name"] = original_name.replace(" ", "_")
|
73 |
+
info["original_name"] = original_name # Keep original name for user display if needed
|
74 |
+
file_extension = file_path.suffix.lower() # Use lower case for consistency
|
75 |
+
|
76 |
+
if file_extension in (".mp4", ".avi", ".mkv", ".mov", ".webm", ".flv", ".wmv", ".mpg", ".mpeg", ".m4v", ".3gp", ".3g2", ".3gpp"):
|
77 |
+
info["type"] = "video"
|
78 |
+
try:
|
79 |
+
video = VideoFileClip(str(file_path)) # Use string path
|
80 |
+
info["duration"] = video.duration
|
81 |
+
info["dimensions"] = f"{video.size[0]}x{video.size[1]}"
|
82 |
+
if video.audio:
|
83 |
+
info["type"] = "video/audio"
|
84 |
+
info["audio_channels"] = video.audio.nchannels
|
85 |
+
video.close()
|
86 |
+
except UnicodeDecodeError as ude:
|
87 |
+
info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
|
88 |
+
logging.warning(f"UnicodeDecodeError processing video {info['name']}: {ude}")
|
89 |
+
except Exception as e:
|
90 |
+
info["error"] = f"Error reading video metadata ({type(e).__name__})."
|
91 |
+
logging.warning(f"Error processing video {info['name']}: {e}", exc_info=True) # Log full traceback
|
92 |
+
|
93 |
+
elif file_extension in (".mp3", ".wav", ".ogg", ".aac", ".flac"):
|
94 |
+
info["type"] = "audio"
|
95 |
+
try:
|
96 |
+
audio = AudioFileClip(str(file_path)) # Use string path
|
97 |
+
info["duration"] = audio.duration
|
98 |
+
info["audio_channels"] = audio.nchannels
|
99 |
+
audio.close()
|
100 |
+
except UnicodeDecodeError as ude:
|
101 |
+
info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
|
102 |
+
logging.warning(f"UnicodeDecodeError processing audio {info['name']}: {ude}")
|
103 |
+
except Exception as e:
|
104 |
+
info["error"] = f"Error reading audio metadata ({type(e).__name__})."
|
105 |
+
logging.warning(f"Error processing audio {info['name']}: {e}", exc_info=True) # Log full traceback
|
106 |
+
|
107 |
+
elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
|
108 |
+
info["type"] = "image"
|
109 |
+
try:
|
110 |
+
with Image.open(file_path) as img:
|
111 |
+
info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
|
112 |
+
except Exception as e:
|
113 |
+
info["error"] = f"Error reading image metadata ({type(e).__name__})."
|
114 |
+
logging.warning(f"Error processing image {info['name']}: {e}", exc_info=True)
|
115 |
+
|
116 |
+
else:
|
117 |
+
info["type"] = "unknown"
|
118 |
+
info["error"] = "Unsupported file type."
|
119 |
+
logging.warning(f"Unsupported file type: {info['name']}")
|
120 |
+
|
121 |
+
except OSError as ose:
|
122 |
+
info["error"] = f"File system error: {ose}"
|
123 |
+
logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
|
124 |
+
if "name" not in info: info["name"] = file_path.name # Ensure name is present even on early error
|
125 |
+
except Exception as e:
|
126 |
+
info["error"] = f"Unexpected error processing file: {e}"
|
127 |
+
logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
|
128 |
+
if "name" not in info: info["name"] = file_path.name
|
129 |
+
|
130 |
+
results.append(info)
|
131 |
+
|
132 |
+
return results
|
133 |
+
|
134 |
+
|
135 |
+
def get_completion(prompt, files_info, top_p, temperature, model_choice):
|
136 |
+
global client # Ensure we are using the global client object
|
137 |
+
|
138 |
+
if client is None:
|
139 |
+
raise gr.Error("API Client not initialized. Please check API key configuration.")
|
140 |
+
|
141 |
+
# --- Create files info table ---
|
142 |
+
files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels | Status |\n"
|
143 |
+
files_info_string += "|------|------|------------|----------|----------------|--------|\n"
|
144 |
+
|
145 |
+
for file_info in files_info:
|
146 |
+
# Use sanitized name for the command context
|
147 |
+
name = file_info.get("name", "N/A")
|
148 |
+
# Use original name or sanitized name for display in the table, decide which is better
|
149 |
+
display_name = file_info.get("original_name", name) # Prefer original name for user readability
|
150 |
+
|
151 |
+
file_type = file_info.get("type", "N/A")
|
152 |
+
dimensions = file_info.get("dimensions", "-")
|
153 |
+
duration = f"{file_info.get('duration', '-'):.2f}s" if "duration" in file_info and file_info['duration'] is not None else "-"
|
154 |
+
audio = f"{file_info.get('audio_channels', '-')}" if "audio_channels" in file_info and file_info['audio_channels'] is not None else "-"
|
155 |
+
status = "Error" if file_info.get("error") else "OK"
|
156 |
+
|
157 |
+
files_info_string += f"| {file_type} | {display_name} | {dimensions} | {duration} | {audio} | {status} |\n"
|
158 |
+
if file_info.get("error"):
|
159 |
+
files_info_string += f"| `Error Details` | `{file_info['error']}` | - | - | - | - |\n" # Add error details row
|
160 |
+
|
161 |
+
# --- Construct Messages ---
|
162 |
+
messages = [
|
163 |
+
{
|
164 |
+
"role": "system",
|
165 |
+
"content": """
|
166 |
+
You are a very experienced media engineer, controlling a UNIX terminal.
|
167 |
+
You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
|
168 |
+
|
169 |
+
You are given:
|
170 |
+
(1) A list of media assets (video, audio, images) with details like name, type, dimensions, duration, and status (including potential errors reading metadata). Use the 'Name' column from the table as the input filename in your command.
|
171 |
+
(2) A user's objective describing a new video to be created from these assets.
|
172 |
+
|
173 |
+
Your objective is to generate the SIMPLEST POSSIBLE, SINGLE ffmpeg command to achieve the user's goal.
|
174 |
+
|
175 |
+
Key requirements:
|
176 |
+
- Output exactly ONE ffmpeg command.
|
177 |
+
- The command MUST be on a single line (no line breaks).
|
178 |
+
- Use the absolute minimum number of ffmpeg options needed.
|
179 |
+
- Avoid complex filter chains (`-filter_complex`) unless absolutely necessary. Prefer simpler filters, concatenation, scaling etc.
|
180 |
+
- The final output file MUST be named exactly `output.mp4`.
|
181 |
+
- Input filenames in the command MUST match the 'Name' column provided in the asset list (which uses underscores instead of spaces).
|
182 |
+
- If the user asks for waveform visualization: use `-filter_complex "[0:a]showwaves=s=WxH:mode=line,format=pix_fmts=yuv420p[v]" -map "[v]" -map 0:a?` (replace WxH with desired video dimensions), and ensure audio is mono (`-ac 1`). Assume full video width if not specified.
|
183 |
+
- For image sequences: Prefer `-framerate` and pattern matching (e.g., `img%03d.png`) if inputs suggest a sequence. Otherwise, use `-loop 1 -t duration` for single images.
|
184 |
+
- Handle potential errors noted in the asset list gracefully if possible (e.g., if metadata is missing, use sensible defaults or inform the user if the task is impossible).
|
185 |
+
- NEVER output multiple commands chained with `&&` or `;`.
|
186 |
+
- NEVER use wildcards like `*` in filenames. Use specific filenames from the list.
|
187 |
+
|
188 |
+
Remember: Simplicity and correctness are key. Generate only the ffmpeg command itself, no explanations.
|
189 |
+
""",
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"role": "user",
|
193 |
+
"content": f"""Provide only the single-line FFMPEG shell command to achieve the objective.
|
194 |
+
|
195 |
+
AVAILABLE ASSETS LIST:
|
196 |
+
|
197 |
+
{files_info_string}
|
198 |
+
|
199 |
+
OBJECTIVE: {prompt}
|
200 |
+
Make sure the final output file is named exactly "output.mp4".
|
201 |
+
|
202 |
+
YOUR FFMPEG COMMAND:
|
203 |
+
""",
|
204 |
+
},
|
205 |
+
]
|
206 |
+
try:
|
207 |
+
# Print the complete prompt for debugging
|
208 |
+
logging.info("\n=== COMPLETE PROMPT ===\n")
|
209 |
+
for msg in messages:
|
210 |
+
logging.info(f"\n[{msg['role'].upper()}]:\n{msg['content']}")
|
211 |
+
logging.info("=====================\n")
|
212 |
+
|
213 |
+
if model_choice not in MODELS:
|
214 |
+
raise ValueError(f"Model {model_choice} is not supported")
|
215 |
+
|
216 |
+
model_config = MODELS[model_choice]
|
217 |
+
api_key = os.environ.get(model_config["env_key"])
|
218 |
+
|
219 |
+
if not api_key:
|
220 |
+
raise gr.Error(f"API Key ({model_config['env_key']}) not found in environment variables for model {model_choice}.")
|
221 |
+
|
222 |
+
# Update client configuration for the selected model
|
223 |
+
client.base_url = model_config["base_url"]
|
224 |
+
client.api_key = api_key
|
225 |
+
# Determine model name based on provider convention
|
226 |
+
model_name = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
|
227 |
+
|
228 |
+
completion = client.chat.completions.create(
|
229 |
+
model=model_name,
|
230 |
+
messages=messages,
|
231 |
+
temperature=temperature,
|
232 |
+
top_p=top_p,
|
233 |
+
max_tokens=2048,
|
234 |
+
)
|
235 |
+
content = completion.choices[0].message.content.strip() # Strip leading/trailing whitespace
|
236 |
+
|
237 |
+
# Extract command: prioritize code blocks, then raw content
|
238 |
+
command = content
|
239 |
+
if "```" in content:
|
240 |
+
import re
|
241 |
+
match = re.search(r"```(?:sh|bash)?\s*(ffmpeg.*?)\s*```", content, re.DOTALL | re.IGNORECASE)
|
242 |
+
if match:
|
243 |
+
command = match.group(1).strip()
|
244 |
+
logging.info(f"Extracted command from code block: {command}")
|
245 |
+
else:
|
246 |
+
# Fallback if block markers exist but pattern fails
|
247 |
+
command = content.replace("```sh", "").replace("```bash", "").replace("```", "").strip()
|
248 |
+
logging.warning(f"Could not extract command reliably from code block, using fallback: {command}")
|
249 |
+
else:
|
250 |
+
logging.info(f"No code block detected, using raw content as command: {command}")
|
251 |
+
|
252 |
+
# Basic validation: ensure it starts with ffmpeg
|
253 |
+
if not command.lower().startswith("ffmpeg "):
|
254 |
+
logging.error(f"Generated content does not start with ffmpeg: {command}")
|
255 |
+
raise ValueError("AI did not generate a valid ffmpeg command.")
|
256 |
+
|
257 |
+
# Remove potential leading/trailing quotes if the AI wrapped the whole command
|
258 |
+
command = command.strip('\'"')
|
259 |
+
|
260 |
+
return command
|
261 |
+
|
262 |
+
except Exception as e:
|
263 |
+
logging.error(f"API Error or processing error in get_completion: {e}", exc_info=True)
|
264 |
+
# Re-raise specific Gradio error for UI display
|
265 |
+
raise gr.Error(f"Failed to get command from AI: {e}")
|
266 |
+
|
267 |
+
|
268 |
+
def update(
|
269 |
+
files,
|
270 |
+
prompt,
|
271 |
+
top_p=1,
|
272 |
+
temperature=1,
|
273 |
+
model_choice=initial_model_choice, # Use the initial model as default
|
274 |
+
):
|
275 |
+
if not files:
|
276 |
+
raise gr.Error("Please upload at least one media file.")
|
277 |
+
if not prompt:
|
278 |
+
raise gr.Error("Please enter editing instructions (prompt).")
|
279 |
+
if client is None and model_choice in MODELS:
|
280 |
+
# Check again if client wasn't initialized but a model is chosen
|
281 |
+
env_key = MODELS[model_choice]["env_key"]
|
282 |
+
if env_key not in os.environ or not os.environ[env_key]:
|
283 |
+
raise gr.Error(f"API Key ({env_key}) for the selected model '{model_choice}' is missing. Please set it as an environment variable.")
|
284 |
+
# Try to re-initialize (or update if partially initialized)
|
285 |
+
global client
|
286 |
+
try:
|
287 |
+
client = OpenAI(
|
288 |
+
base_url=MODELS[model_choice]["base_url"],
|
289 |
+
api_key=os.environ[env_key],
|
290 |
+
)
|
291 |
+
logging.info(f"API Client initialized/updated for model: {model_choice}")
|
292 |
+
except Exception as e:
|
293 |
+
raise gr.Error(f"Failed to initialize API client: {e}")
|
294 |
+
|
295 |
+
|
296 |
+
# 1. Get File Infos and Check for Initial Errors
|
297 |
+
files_info = get_files_infos(files)
|
298 |
+
file_errors = [f"- {f.get('original_name', f.get('name', 'Unknown file'))}: {f['error']}" for f in files_info if f.get("error")]
|
299 |
+
if file_errors:
|
300 |
+
error_message = "Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
|
301 |
+
logging.error(error_message)
|
302 |
+
raise gr.Error(error_message)
|
303 |
+
|
304 |
+
# 2. Validate File Sizes and Durations (optional, based on your constraints)
|
305 |
+
for file_info in files_info:
|
306 |
+
if file_info["size"] > 1000 * 1024 * 1024: # 100MB limit
|
307 |
+
raise gr.Error(f"File '{file_info.get('original_name', file_info['name'])}' exceeds the 100MB size limit.")
|
308 |
+
if file_info.get("type", "").startswith("video") and file_info.get("duration", 0) > 120: # 2 minute limit for videos
|
309 |
+
raise gr.Error(f"Video '{file_info.get('original_name', file_info['name'])}' exceeds the 2-minute duration limit.")
|
310 |
+
|
311 |
+
# 3. Get FFMPEG Command from AI (with retries if needed)
|
312 |
+
command_string = None
|
313 |
+
attempts = 0
|
314 |
+
max_attempts = 2 # Allow one retry
|
315 |
+
last_exception = None
|
316 |
+
|
317 |
+
while attempts < max_attempts:
|
318 |
+
logging.info(f"Attempt {attempts + 1} to generate FFMPEG command.")
|
319 |
+
try:
|
320 |
+
command_string = get_completion(
|
321 |
+
prompt, files_info, top_p, temperature, model_choice
|
322 |
+
)
|
323 |
+
logging.info(
|
324 |
+
f"Generated FFMPEG command string:\n{command_string}\n"
|
325 |
+
)
|
326 |
+
break # Success, exit loop
|
327 |
+
except Exception as e:
|
328 |
+
last_exception = e
|
329 |
+
logging.warning(f"Attempt {attempts + 1} failed: {e}")
|
330 |
+
attempts += 1
|
331 |
+
if attempts >= max_attempts:
|
332 |
+
logging.error("Max attempts reached. Failed to generate valid command.")
|
333 |
+
raise gr.Error(f"Failed to generate FFMPEG command after {max_attempts} attempts. Last error: {last_exception}")
|
334 |
+
|
335 |
+
# 4. Prepare Temporary Directory and Files
|
336 |
+
temp_dir_obj = tempfile.TemporaryDirectory()
|
337 |
+
temp_dir = temp_dir_obj.name
|
338 |
+
logging.info(f"Created temporary directory: {temp_dir}")
|
339 |
+
try:
|
340 |
+
copied_file_paths = {}
|
341 |
+
for i, file_obj in enumerate(files):
|
342 |
+
original_path = Path(file_obj.name)
|
343 |
+
# Use the sanitized name consistent with files_info sent to AI
|
344 |
+
sanitized_name = original_path.name.replace(" ", "_")
|
345 |
+
destination_path = Path(temp_dir) / sanitized_name
|
346 |
+
shutil.copy(original_path, destination_path)
|
347 |
+
logging.info(f"Copied '{original_path.name}' to '{destination_path}'")
|
348 |
+
copied_file_paths[i] = destination_path # Keep track if needed
|
349 |
+
|
350 |
+
# 5. Validate and Execute FFMPEG Command
|
351 |
+
try:
|
352 |
+
# Split command string safely for shell execution
|
353 |
+
args = shlex.split(command_string)
|
354 |
+
except ValueError as e:
|
355 |
+
raise gr.Error(f"Generated command has syntax errors (e.g., unbalanced quotes): {e}\nCommand: {command_string}")
|
356 |
+
|
357 |
+
|
358 |
+
if not args or args[0].lower() != "ffmpeg":
|
359 |
+
raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
|
360 |
+
|
361 |
+
# IMPORTANT: Check and remove the placeholder 'output.mp4' if it's the last argument
|
362 |
+
if args[-1] == "output.mp4":
|
363 |
+
logging.info("Removing placeholder 'output.mp4' from the end of the command.")
|
364 |
+
args.pop()
|
365 |
+
elif "output.mp4" in args:
|
366 |
+
logging.warning("Placeholder 'output.mp4' found but not at the end of the command. Execution might fail.")
|
367 |
+
# Decide if you want to raise an error here or let ffmpeg handle it
|
368 |
+
|
369 |
+
|
370 |
+
# Define the actual output path
|
371 |
+
output_file_name = f"output_{uuid.uuid4()}.mp4"
|
372 |
+
output_file_path = str(Path(temp_dir) / output_file_name)
|
373 |
+
|
374 |
+
# Dry Run (optional but recommended)
|
375 |
+
# Note: Dry run might fail for complex commands even if they are valid for execution
|
376 |
+
# Consider making dry run optional or improving its robustness if needed
|
377 |
+
# dry_run_args = args + ["-f", "null", "-"]
|
378 |
+
# logging.info(f"Performing dry run: {' '.join(dry_run_args)}")
|
379 |
+
# ffmpg_dry_run = subprocess.run(
|
380 |
+
# dry_run_args,
|
381 |
+
# stderr=subprocess.PIPE,
|
382 |
+
# stdout=subprocess.PIPE, # Capture stdout too
|
383 |
+
# text=True,
|
384 |
+
# encoding='utf-8', errors='replace', # Handle potential weird output
|
385 |
+
# cwd=temp_dir,
|
386 |
+
# timeout=30 # Add a timeout
|
387 |
+
# )
|
388 |
+
# if ffmpg_dry_run.returncode != 0:
|
389 |
+
# error_output = ffmpg_dry_run.stderr or ffmpg_dry_run.stdout
|
390 |
+
# logging.error(f"FFMPEG dry run failed. Return code: {ffmpg_dry_run.returncode}\nOutput:\n{error_output}")
|
391 |
+
# raise gr.Error(f"Generated FFMPEG command seems invalid (Dry Run Failed). Please check the command or try different instructions.\nError: {error_output[:500]}...") # Show partial error
|
392 |
+
|
393 |
+
|
394 |
+
# Final Execution
|
395 |
+
final_command = args + ["-y", output_file_path] # Add overwrite flag and final output path
|
396 |
+
logging.info(f"Executing FFMPEG command: ffmpeg {' '.join(final_command[1:])}")
|
397 |
+
|
398 |
+
try:
|
399 |
+
process = subprocess.run(
|
400 |
+
final_command,
|
401 |
+
cwd=temp_dir,
|
402 |
+
stderr=subprocess.PIPE,
|
403 |
+
stdout=subprocess.PIPE,
|
404 |
+
text=True,
|
405 |
+
encoding='utf-8', errors='replace',
|
406 |
+
check=True, # Raise CalledProcessError if return code is non-zero
|
407 |
+
timeout=3000 # Set a reasonable timeout (e.g., 5 minutes)
|
408 |
+
)
|
409 |
+
logging.info("FFMPEG command executed successfully.")
|
410 |
+
logging.info(f"FFMPEG stdout:\n{process.stdout}")
|
411 |
+
logging.info(f"FFMPEG stderr:\n{process.stderr}")
|
412 |
+
|
413 |
+
except subprocess.CalledProcessError as e:
|
414 |
+
error_output = e.stderr or e.stdout
|
415 |
+
logging.error(f"FFMPEG execution failed! Return code: {e.returncode}\nCommand: {' '.join(e.cmd)}\nOutput:\n{error_output}")
|
416 |
+
raise gr.Error(f"FFMPEG execution failed.\nCommand: ffmpeg {' '.join(final_command[1:])}\nError: {error_output[:1000]}...") # Show more error context
|
417 |
+
except subprocess.TimeoutExpired as e:
|
418 |
+
logging.error(f"FFMPEG command timed out after {e.timeout} seconds.\nCommand: {' '.join(e.cmd)}")
|
419 |
+
raise gr.Error(f"FFMPEG command timed out after {e.timeout} seconds. The operation might be too complex or the files too large.")
|
420 |
+
|
421 |
+
|
422 |
+
# 6. Prepare Output
|
423 |
+
# Display the command used (using the originally generated args + output)
|
424 |
+
display_command_args = args + ["-y", "output.mp4"] # Reconstruct for display
|
425 |
+
generated_command_markdown = f"### Generated Command\n```bash\nffmpeg {' '.join(display_command_args[1:])}\n```"
|
426 |
+
|
427 |
+
# Return the path to the generated video and the command markdown
|
428 |
+
# Gradio needs the actual path; it will handle cleanup if temp_dir_obj goes out of scope
|
429 |
+
# However, explicitly returning the temp dir object might be safer depending on Gradio version
|
430 |
+
# For simplicity, returning the path and relying on Gradio's handling of temp files.
|
431 |
+
return output_file_path, gr.update(value=generated_command_markdown)
|
432 |
+
|
433 |
+
except Exception as e:
|
434 |
+
# Catch any other unexpected errors during setup or execution
|
435 |
+
logging.error(f"Error in update function: {e}", exc_info=True)
|
436 |
+
# Clean up the temp directory manually if an error occurred before returning
|
437 |
+
temp_dir_obj.cleanup()
|
438 |
+
raise gr.Error(f"An unexpected error occurred: {e}")
|
439 |
+
|
440 |
+
# No finally block needed for temp_dir_obj.cleanup() if using 'with TemporaryDirectory()'
|
441 |
+
# If not using 'with', ensure cleanup happens in try/except/finally
|
442 |
+
|
443 |
+
|
444 |
+
# --- Gradio Interface ---
|
445 |
+
with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft()) as demo:
|
446 |
+
gr.Markdown(
|
447 |
+
"""
|
448 |
+
# 🏞 AI Video Editor: Your Smart Editing Assistant 🎬
|
449 |
+
|
450 |
+
Welcome to the AI Video Editor! This powerful tool leverages advanced AI models like **Qwen2.5-Coder** and **DeepSeek-V3** to understand your editing needs expressed in plain English. Simply upload your video, audio, or image files, describe the desired outcome, and watch as the AI generates the necessary **FFMPEG command** to create your final video.
|
451 |
+
|
452 |
+
**No complex software or coding required!** Perfect for quick edits, batch processing ideas, learning FFMPEG syntax, or automating simple video tasks. Whether you need to trim, merge, add text, change speed, apply filters, or combine different media types, just tell the AI what you want.
|
453 |
+
|
454 |
+
**Get started now:** Upload your files, type your instructions, and hit "Run"!
|
455 |
+
""",
|
456 |
+
elem_id="header",
|
457 |
+
)
|
458 |
+
|
459 |
+
with gr.Accordion("📋 Usage Instructions & Examples", open=False):
|
460 |
+
gr.Markdown(
|
461 |
+
"""
|
462 |
+
### How to Use AI Video Editor
|
463 |
+
|
464 |
+
1. **Upload Media Files**: Drag & drop or click to upload your video, image, or audio files (`.mp4`, `.mov`, `.mp3`, `.wav`, `.jpg`, `.png`, etc.) into the "Media files" area. Multiple files are allowed.
|
465 |
+
2. **Write Instructions**: Clearly describe the editing task in the "Instructions" textbox. Be specific for best results.
|
466 |
+
3. **(Optional) Adjust Parameters**:
|
467 |
+
* **Model**: Choose the AI model you want to use. Different models might have varying strengths in understanding instructions or FFMPEG knowledge.
|
468 |
+
* **Top-p & Temperature**: Fine-tune the AI's creativity and randomness. Lower temperature (e.g., 0.1) leads to more predictable results, higher values increase randomness. Top-p controls the diversity of the AI's choices. Default values are usually good starting points.
|
469 |
+
4. **Generate**: Click the **"Run"** button. The AI will generate an FFMPEG command, which will then be executed to produce your video.
|
470 |
+
5. **Review**: The resulting video will appear in the "Generated Video" player. The exact FFMPEG command used will be shown below it.
|
471 |
+
|
472 |
+
### Example Instructions
|
473 |
+
|
474 |
+
* `Trim the video to keep only the segment from 10 seconds to 25 seconds.`
|
475 |
+
* `Concatenate video1.mp4 and video2.mp4 into a single video.`
|
476 |
+
* `Add a text overlay "My Vacation 2024" at the bottom center with a white font.`
|
477 |
+
* `Convert the input video to black and white.`
|
478 |
+
* `Create a slideshow from image1.png and image2.png, each shown for 5 seconds, with background_music.mp3.`
|
479 |
+
* `Resize the video to 1280x720 pixels.`
|
480 |
+
* `Speed up the video by 2x.`
|
481 |
+
* `Extract the audio track from the video as an mp3 file.` (Note: Current setup forces mp4 output, adjust system prompt if other outputs needed)
|
482 |
+
* `Create a picture-in-picture effect with small_video.mp4 overlaid on the top right corner of main_video.mp4.`
|
483 |
+
* `Generate a waveform visualization for the audio file.`
|
484 |
+
|
485 |
+
### Tips for Better Results
|
486 |
+
|
487 |
+
* **Be Specific**: Instead of "make it shorter," say "remove the first 5 seconds."
|
488 |
+
* **Use Filenames**: Refer to files by their names (e.g., `Combine intro.mp4 and main.mp4`). The AI uses names with spaces replaced by underscores.
|
489 |
+
* **Specify Details**: For text, mention font size, color, position (e.g., `top_left`, `center`, `bottom_right`). For effects, specify parameters (e.g., `fade duration of 1 second`).
|
490 |
+
* **Keep it Simple**: Aim for one primary goal per instruction. Complex multi-step edits might require breaking down the task or might exceed the AI's ability to generate a single, simple command.
|
491 |
+
"""
|
492 |
+
)
|
493 |
+
|
494 |
+
with gr.Row():
|
495 |
+
with gr.Column(scale=1):
|
496 |
+
user_files = gr.File(
|
497 |
+
file_count="multiple",
|
498 |
+
label="Upload Media Files",
|
499 |
+
file_types=allowed_medias,
|
500 |
+
# Consider adding interactive=True if needed, default is True
|
501 |
+
)
|
502 |
+
user_prompt = gr.Textbox(
|
503 |
+
placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
|
504 |
+
label="Instructions / Editing Objective",
|
505 |
+
lines=3,
|
506 |
+
)
|
507 |
+
with gr.Accordion("Advanced Parameters", open=False):
|
508 |
+
model_choice = gr.Radio(
|
509 |
+
choices=list(MODELS.keys()),
|
510 |
+
value=initial_model_choice, # Use the determined initial model
|
511 |
+
label="Select AI Model",
|
512 |
+
)
|
513 |
+
top_p = gr.Slider(
|
514 |
+
minimum=0.0, maximum=1.0, value=0.7, step=0.05,
|
515 |
+
label="Top-p (Controls diversity)",
|
516 |
+
)
|
517 |
+
temperature = gr.Slider(
|
518 |
+
minimum=0.0, maximum=2.0, value=0.1, step=0.1, # Max temp usually 1.0 or 2.0
|
519 |
+
label="Temperature (Controls randomness)",
|
520 |
+
)
|
521 |
+
btn = gr.Button("🚀 Run Edit", variant="primary")
|
522 |
+
|
523 |
+
with gr.Column(scale=1):
|
524 |
+
generated_video = gr.Video(
|
525 |
+
label="Generated Video Output",
|
526 |
+
interactive=False, # User cannot change the video here
|
527 |
+
include_audio=True,
|
528 |
+
)
|
529 |
+
generated_command = gr.Markdown(label="Generated FFMPEG Command")
|
530 |
+
|
531 |
+
# Link button click to the update function
|
532 |
+
btn.click(
|
533 |
+
fn=update,
|
534 |
+
inputs=[user_files, user_prompt, top_p, temperature, model_choice],
|
535 |
+
outputs=[generated_video, generated_command],
|
536 |
+
api_name="generate_edit" # Optional: Define API endpoint name
|
537 |
+
)
|
538 |
+
|
539 |
+
# Examples Section
|
540 |
+
gr.Examples(
|
541 |
+
examples=[
|
542 |
+
[
|
543 |
+
["./examples/Jiangnan_Rain.mp4"], # Make sure this path exists or adjust
|
544 |
+
"Add a text watermark 'Sample Video' to the upper right corner of the video with white text and semi-transparent background.",
|
545 |
+
0.7, 0.1, list(MODELS.keys())[0],
|
546 |
+
],
|
547 |
+
[
|
548 |
+
["./examples/Jiangnan_Rain.mp4"],
|
549 |
+
"Cut the video to extract only the middle 30 seconds (starting at 00:30 and ending at 01:00).",
|
550 |
+
0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)], # Use second model if available
|
551 |
+
],
|
552 |
+
[
|
553 |
+
["./examples/Lotus_Pond01.mp4"], # Make sure this path exists or adjust
|
554 |
+
"Convert the video to black and white (grayscale) while maintaining the original audio.",
|
555 |
+
0.7, 0.1, list(MODELS.keys())[0],
|
556 |
+
],
|
557 |
+
[
|
558 |
+
["./examples/Lotus_Pond01.mp4"],
|
559 |
+
"Create a slow-motion version of the video by reducing the speed to 0.5x.",
|
560 |
+
0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)],
|
561 |
+
],
|
562 |
+
[
|
563 |
+
["./examples/image1.jpg", "./examples/image2.png", "./examples/background.mp3"], # Example with images and audio
|
564 |
+
"Create a video slideshow from image1.jpg and image2.png, showing each image for 4 seconds. Use background.mp3 as the audio track.",
|
565 |
+
0.7, 0.1, list(MODELS.keys())[0],
|
566 |
+
],
|
567 |
+
],
|
568 |
+
inputs=[user_files, user_prompt, top_p, temperature, model_choice],
|
569 |
+
outputs=[generated_video, generated_command],
|
570 |
+
fn=update,
|
571 |
+
cache_examples=False, # Set to True if example files are stable and processing is slow
|
572 |
+
label="Example Use Cases (Click to Run)",
|
573 |
+
run_on_click=True,
|
574 |
+
)
|
575 |
+
|
576 |
+
# Removed the footer markdown about pull requests
|
577 |
+
|
578 |
+
# --- Launch the App ---
|
579 |
+
# Consider adding concurrency limits based on your hosting capabilities
|
580 |
+
demo.queue(default_concurrency_limit=50)
|
581 |
+
# demo.launch(show_api=False, server_name="0.0.0.0") # Allow external access if needed
|
582 |
+
demo.launch(show_api=False,enable_analytics=False) # Default launch for local/Hugging Face Spaces
|