innoai commited on
Commit
19f3ed7
·
verified ·
1 Parent(s): a7c2e85

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +582 -0
app2.py ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from moviepy.editor import VideoFileClip, AudioFileClip
4
+ import os
5
+ from openai import OpenAI
6
+ import subprocess
7
+ from pathlib import Path
8
+ import uuid
9
+ import tempfile
10
+ import shlex
11
+ import shutil
12
+ import logging # 添加日志记录
13
+
14
+ # 配置日志记录
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+ # Supported models configuration
18
+ MODELS = {
19
+ "deepseek-ai/DeepSeek-V3": {
20
+ "base_url": "https://api.deepseek.com/v1",
21
+ "env_key": "DEEPSEEK_API_KEY",
22
+ },
23
+ "Qwen/Qwen2.5-Coder-32B-Instruct": {
24
+ "base_url": "https://api-inference.huggingface.co/v1/",
25
+ "env_key": "HF_TOKEN",
26
+ },
27
+ # Add more models here if needed
28
+ }
29
+
30
+ # Function to get the first available API key
31
+ def get_first_available_key_config():
32
+ for model, config in MODELS.items():
33
+ if config["env_key"] in os.environ and os.environ[config["env_key"]]:
34
+ logging.info(f"Using API key for model: {model}")
35
+ return config
36
+ return None
37
+
38
+ # Initialize client with first available model configuration
39
+ initial_config = get_first_available_key_config()
40
+ if initial_config:
41
+ client = OpenAI(
42
+ base_url=initial_config["base_url"],
43
+ api_key=os.environ[initial_config["env_key"]],
44
+ )
45
+ initial_model_choice = next(iter(MODELS.keys())) # Keep track of which model config was used initially
46
+ else:
47
+ logging.warning("No API keys found in environment variables for configured models. API calls will fail.")
48
+ # Initialize with placeholder values or handle error as appropriate
49
+ client = None # Or raise an error, or use a default config if applicable
50
+ initial_model_choice = list(MODELS.keys())[0] # Default UI selection
51
+
52
+ allowed_medias = [
53
+ ".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
54
+ ".mp3", ".wav", ".ogg", ".aac", ".flac", # Added more audio types
55
+ ".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
56
+ ".3gp", ".3g2", ".3gpp",
57
+ ]
58
+
59
+
60
+ def get_files_infos(files):
61
+ results = []
62
+ if not files:
63
+ return results
64
+
65
+ for file_obj in files:
66
+ file_path = Path(file_obj.name)
67
+ info = {"error": None} # Initialize error as None
68
+ try:
69
+ info["size"] = os.path.getsize(file_path)
70
+ # Sanitize filename by replacing spaces with underscores
71
+ original_name = file_path.name
72
+ info["name"] = original_name.replace(" ", "_")
73
+ info["original_name"] = original_name # Keep original name for user display if needed
74
+ file_extension = file_path.suffix.lower() # Use lower case for consistency
75
+
76
+ if file_extension in (".mp4", ".avi", ".mkv", ".mov", ".webm", ".flv", ".wmv", ".mpg", ".mpeg", ".m4v", ".3gp", ".3g2", ".3gpp"):
77
+ info["type"] = "video"
78
+ try:
79
+ video = VideoFileClip(str(file_path)) # Use string path
80
+ info["duration"] = video.duration
81
+ info["dimensions"] = f"{video.size[0]}x{video.size[1]}"
82
+ if video.audio:
83
+ info["type"] = "video/audio"
84
+ info["audio_channels"] = video.audio.nchannels
85
+ video.close()
86
+ except UnicodeDecodeError as ude:
87
+ info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
88
+ logging.warning(f"UnicodeDecodeError processing video {info['name']}: {ude}")
89
+ except Exception as e:
90
+ info["error"] = f"Error reading video metadata ({type(e).__name__})."
91
+ logging.warning(f"Error processing video {info['name']}: {e}", exc_info=True) # Log full traceback
92
+
93
+ elif file_extension in (".mp3", ".wav", ".ogg", ".aac", ".flac"):
94
+ info["type"] = "audio"
95
+ try:
96
+ audio = AudioFileClip(str(file_path)) # Use string path
97
+ info["duration"] = audio.duration
98
+ info["audio_channels"] = audio.nchannels
99
+ audio.close()
100
+ except UnicodeDecodeError as ude:
101
+ info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
102
+ logging.warning(f"UnicodeDecodeError processing audio {info['name']}: {ude}")
103
+ except Exception as e:
104
+ info["error"] = f"Error reading audio metadata ({type(e).__name__})."
105
+ logging.warning(f"Error processing audio {info['name']}: {e}", exc_info=True) # Log full traceback
106
+
107
+ elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
108
+ info["type"] = "image"
109
+ try:
110
+ with Image.open(file_path) as img:
111
+ info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
112
+ except Exception as e:
113
+ info["error"] = f"Error reading image metadata ({type(e).__name__})."
114
+ logging.warning(f"Error processing image {info['name']}: {e}", exc_info=True)
115
+
116
+ else:
117
+ info["type"] = "unknown"
118
+ info["error"] = "Unsupported file type."
119
+ logging.warning(f"Unsupported file type: {info['name']}")
120
+
121
+ except OSError as ose:
122
+ info["error"] = f"File system error: {ose}"
123
+ logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
124
+ if "name" not in info: info["name"] = file_path.name # Ensure name is present even on early error
125
+ except Exception as e:
126
+ info["error"] = f"Unexpected error processing file: {e}"
127
+ logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
128
+ if "name" not in info: info["name"] = file_path.name
129
+
130
+ results.append(info)
131
+
132
+ return results
133
+
134
+
135
+ def get_completion(prompt, files_info, top_p, temperature, model_choice):
136
+ global client # Ensure we are using the global client object
137
+
138
+ if client is None:
139
+ raise gr.Error("API Client not initialized. Please check API key configuration.")
140
+
141
+ # --- Create files info table ---
142
+ files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels | Status |\n"
143
+ files_info_string += "|------|------|------------|----------|----------------|--------|\n"
144
+
145
+ for file_info in files_info:
146
+ # Use sanitized name for the command context
147
+ name = file_info.get("name", "N/A")
148
+ # Use original name or sanitized name for display in the table, decide which is better
149
+ display_name = file_info.get("original_name", name) # Prefer original name for user readability
150
+
151
+ file_type = file_info.get("type", "N/A")
152
+ dimensions = file_info.get("dimensions", "-")
153
+ duration = f"{file_info.get('duration', '-'):.2f}s" if "duration" in file_info and file_info['duration'] is not None else "-"
154
+ audio = f"{file_info.get('audio_channels', '-')}" if "audio_channels" in file_info and file_info['audio_channels'] is not None else "-"
155
+ status = "Error" if file_info.get("error") else "OK"
156
+
157
+ files_info_string += f"| {file_type} | {display_name} | {dimensions} | {duration} | {audio} | {status} |\n"
158
+ if file_info.get("error"):
159
+ files_info_string += f"| `Error Details` | `{file_info['error']}` | - | - | - | - |\n" # Add error details row
160
+
161
+ # --- Construct Messages ---
162
+ messages = [
163
+ {
164
+ "role": "system",
165
+ "content": """
166
+ You are a very experienced media engineer, controlling a UNIX terminal.
167
+ You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
168
+
169
+ You are given:
170
+ (1) A list of media assets (video, audio, images) with details like name, type, dimensions, duration, and status (including potential errors reading metadata). Use the 'Name' column from the table as the input filename in your command.
171
+ (2) A user's objective describing a new video to be created from these assets.
172
+
173
+ Your objective is to generate the SIMPLEST POSSIBLE, SINGLE ffmpeg command to achieve the user's goal.
174
+
175
+ Key requirements:
176
+ - Output exactly ONE ffmpeg command.
177
+ - The command MUST be on a single line (no line breaks).
178
+ - Use the absolute minimum number of ffmpeg options needed.
179
+ - Avoid complex filter chains (`-filter_complex`) unless absolutely necessary. Prefer simpler filters, concatenation, scaling etc.
180
+ - The final output file MUST be named exactly `output.mp4`.
181
+ - Input filenames in the command MUST match the 'Name' column provided in the asset list (which uses underscores instead of spaces).
182
+ - If the user asks for waveform visualization: use `-filter_complex "[0:a]showwaves=s=WxH:mode=line,format=pix_fmts=yuv420p[v]" -map "[v]" -map 0:a?` (replace WxH with desired video dimensions), and ensure audio is mono (`-ac 1`). Assume full video width if not specified.
183
+ - For image sequences: Prefer `-framerate` and pattern matching (e.g., `img%03d.png`) if inputs suggest a sequence. Otherwise, use `-loop 1 -t duration` for single images.
184
+ - Handle potential errors noted in the asset list gracefully if possible (e.g., if metadata is missing, use sensible defaults or inform the user if the task is impossible).
185
+ - NEVER output multiple commands chained with `&&` or `;`.
186
+ - NEVER use wildcards like `*` in filenames. Use specific filenames from the list.
187
+
188
+ Remember: Simplicity and correctness are key. Generate only the ffmpeg command itself, no explanations.
189
+ """,
190
+ },
191
+ {
192
+ "role": "user",
193
+ "content": f"""Provide only the single-line FFMPEG shell command to achieve the objective.
194
+
195
+ AVAILABLE ASSETS LIST:
196
+
197
+ {files_info_string}
198
+
199
+ OBJECTIVE: {prompt}
200
+ Make sure the final output file is named exactly "output.mp4".
201
+
202
+ YOUR FFMPEG COMMAND:
203
+ """,
204
+ },
205
+ ]
206
+ try:
207
+ # Print the complete prompt for debugging
208
+ logging.info("\n=== COMPLETE PROMPT ===\n")
209
+ for msg in messages:
210
+ logging.info(f"\n[{msg['role'].upper()}]:\n{msg['content']}")
211
+ logging.info("=====================\n")
212
+
213
+ if model_choice not in MODELS:
214
+ raise ValueError(f"Model {model_choice} is not supported")
215
+
216
+ model_config = MODELS[model_choice]
217
+ api_key = os.environ.get(model_config["env_key"])
218
+
219
+ if not api_key:
220
+ raise gr.Error(f"API Key ({model_config['env_key']}) not found in environment variables for model {model_choice}.")
221
+
222
+ # Update client configuration for the selected model
223
+ client.base_url = model_config["base_url"]
224
+ client.api_key = api_key
225
+ # Determine model name based on provider convention
226
+ model_name = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
227
+
228
+ completion = client.chat.completions.create(
229
+ model=model_name,
230
+ messages=messages,
231
+ temperature=temperature,
232
+ top_p=top_p,
233
+ max_tokens=2048,
234
+ )
235
+ content = completion.choices[0].message.content.strip() # Strip leading/trailing whitespace
236
+
237
+ # Extract command: prioritize code blocks, then raw content
238
+ command = content
239
+ if "```" in content:
240
+ import re
241
+ match = re.search(r"```(?:sh|bash)?\s*(ffmpeg.*?)\s*```", content, re.DOTALL | re.IGNORECASE)
242
+ if match:
243
+ command = match.group(1).strip()
244
+ logging.info(f"Extracted command from code block: {command}")
245
+ else:
246
+ # Fallback if block markers exist but pattern fails
247
+ command = content.replace("```sh", "").replace("```bash", "").replace("```", "").strip()
248
+ logging.warning(f"Could not extract command reliably from code block, using fallback: {command}")
249
+ else:
250
+ logging.info(f"No code block detected, using raw content as command: {command}")
251
+
252
+ # Basic validation: ensure it starts with ffmpeg
253
+ if not command.lower().startswith("ffmpeg "):
254
+ logging.error(f"Generated content does not start with ffmpeg: {command}")
255
+ raise ValueError("AI did not generate a valid ffmpeg command.")
256
+
257
+ # Remove potential leading/trailing quotes if the AI wrapped the whole command
258
+ command = command.strip('\'"')
259
+
260
+ return command
261
+
262
+ except Exception as e:
263
+ logging.error(f"API Error or processing error in get_completion: {e}", exc_info=True)
264
+ # Re-raise specific Gradio error for UI display
265
+ raise gr.Error(f"Failed to get command from AI: {e}")
266
+
267
+
268
+ def update(
269
+ files,
270
+ prompt,
271
+ top_p=1,
272
+ temperature=1,
273
+ model_choice=initial_model_choice, # Use the initial model as default
274
+ ):
275
+ if not files:
276
+ raise gr.Error("Please upload at least one media file.")
277
+ if not prompt:
278
+ raise gr.Error("Please enter editing instructions (prompt).")
279
+ if client is None and model_choice in MODELS:
280
+ # Check again if client wasn't initialized but a model is chosen
281
+ env_key = MODELS[model_choice]["env_key"]
282
+ if env_key not in os.environ or not os.environ[env_key]:
283
+ raise gr.Error(f"API Key ({env_key}) for the selected model '{model_choice}' is missing. Please set it as an environment variable.")
284
+ # Try to re-initialize (or update if partially initialized)
285
+ global client
286
+ try:
287
+ client = OpenAI(
288
+ base_url=MODELS[model_choice]["base_url"],
289
+ api_key=os.environ[env_key],
290
+ )
291
+ logging.info(f"API Client initialized/updated for model: {model_choice}")
292
+ except Exception as e:
293
+ raise gr.Error(f"Failed to initialize API client: {e}")
294
+
295
+
296
+ # 1. Get File Infos and Check for Initial Errors
297
+ files_info = get_files_infos(files)
298
+ file_errors = [f"- {f.get('original_name', f.get('name', 'Unknown file'))}: {f['error']}" for f in files_info if f.get("error")]
299
+ if file_errors:
300
+ error_message = "Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
301
+ logging.error(error_message)
302
+ raise gr.Error(error_message)
303
+
304
+ # 2. Validate File Sizes and Durations (optional, based on your constraints)
305
+ for file_info in files_info:
306
+ if file_info["size"] > 1000 * 1024 * 1024: # 100MB limit
307
+ raise gr.Error(f"File '{file_info.get('original_name', file_info['name'])}' exceeds the 100MB size limit.")
308
+ if file_info.get("type", "").startswith("video") and file_info.get("duration", 0) > 120: # 2 minute limit for videos
309
+ raise gr.Error(f"Video '{file_info.get('original_name', file_info['name'])}' exceeds the 2-minute duration limit.")
310
+
311
+ # 3. Get FFMPEG Command from AI (with retries if needed)
312
+ command_string = None
313
+ attempts = 0
314
+ max_attempts = 2 # Allow one retry
315
+ last_exception = None
316
+
317
+ while attempts < max_attempts:
318
+ logging.info(f"Attempt {attempts + 1} to generate FFMPEG command.")
319
+ try:
320
+ command_string = get_completion(
321
+ prompt, files_info, top_p, temperature, model_choice
322
+ )
323
+ logging.info(
324
+ f"Generated FFMPEG command string:\n{command_string}\n"
325
+ )
326
+ break # Success, exit loop
327
+ except Exception as e:
328
+ last_exception = e
329
+ logging.warning(f"Attempt {attempts + 1} failed: {e}")
330
+ attempts += 1
331
+ if attempts >= max_attempts:
332
+ logging.error("Max attempts reached. Failed to generate valid command.")
333
+ raise gr.Error(f"Failed to generate FFMPEG command after {max_attempts} attempts. Last error: {last_exception}")
334
+
335
+ # 4. Prepare Temporary Directory and Files
336
+ temp_dir_obj = tempfile.TemporaryDirectory()
337
+ temp_dir = temp_dir_obj.name
338
+ logging.info(f"Created temporary directory: {temp_dir}")
339
+ try:
340
+ copied_file_paths = {}
341
+ for i, file_obj in enumerate(files):
342
+ original_path = Path(file_obj.name)
343
+ # Use the sanitized name consistent with files_info sent to AI
344
+ sanitized_name = original_path.name.replace(" ", "_")
345
+ destination_path = Path(temp_dir) / sanitized_name
346
+ shutil.copy(original_path, destination_path)
347
+ logging.info(f"Copied '{original_path.name}' to '{destination_path}'")
348
+ copied_file_paths[i] = destination_path # Keep track if needed
349
+
350
+ # 5. Validate and Execute FFMPEG Command
351
+ try:
352
+ # Split command string safely for shell execution
353
+ args = shlex.split(command_string)
354
+ except ValueError as e:
355
+ raise gr.Error(f"Generated command has syntax errors (e.g., unbalanced quotes): {e}\nCommand: {command_string}")
356
+
357
+
358
+ if not args or args[0].lower() != "ffmpeg":
359
+ raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
360
+
361
+ # IMPORTANT: Check and remove the placeholder 'output.mp4' if it's the last argument
362
+ if args[-1] == "output.mp4":
363
+ logging.info("Removing placeholder 'output.mp4' from the end of the command.")
364
+ args.pop()
365
+ elif "output.mp4" in args:
366
+ logging.warning("Placeholder 'output.mp4' found but not at the end of the command. Execution might fail.")
367
+ # Decide if you want to raise an error here or let ffmpeg handle it
368
+
369
+
370
+ # Define the actual output path
371
+ output_file_name = f"output_{uuid.uuid4()}.mp4"
372
+ output_file_path = str(Path(temp_dir) / output_file_name)
373
+
374
+ # Dry Run (optional but recommended)
375
+ # Note: Dry run might fail for complex commands even if they are valid for execution
376
+ # Consider making dry run optional or improving its robustness if needed
377
+ # dry_run_args = args + ["-f", "null", "-"]
378
+ # logging.info(f"Performing dry run: {' '.join(dry_run_args)}")
379
+ # ffmpg_dry_run = subprocess.run(
380
+ # dry_run_args,
381
+ # stderr=subprocess.PIPE,
382
+ # stdout=subprocess.PIPE, # Capture stdout too
383
+ # text=True,
384
+ # encoding='utf-8', errors='replace', # Handle potential weird output
385
+ # cwd=temp_dir,
386
+ # timeout=30 # Add a timeout
387
+ # )
388
+ # if ffmpg_dry_run.returncode != 0:
389
+ # error_output = ffmpg_dry_run.stderr or ffmpg_dry_run.stdout
390
+ # logging.error(f"FFMPEG dry run failed. Return code: {ffmpg_dry_run.returncode}\nOutput:\n{error_output}")
391
+ # raise gr.Error(f"Generated FFMPEG command seems invalid (Dry Run Failed). Please check the command or try different instructions.\nError: {error_output[:500]}...") # Show partial error
392
+
393
+
394
+ # Final Execution
395
+ final_command = args + ["-y", output_file_path] # Add overwrite flag and final output path
396
+ logging.info(f"Executing FFMPEG command: ffmpeg {' '.join(final_command[1:])}")
397
+
398
+ try:
399
+ process = subprocess.run(
400
+ final_command,
401
+ cwd=temp_dir,
402
+ stderr=subprocess.PIPE,
403
+ stdout=subprocess.PIPE,
404
+ text=True,
405
+ encoding='utf-8', errors='replace',
406
+ check=True, # Raise CalledProcessError if return code is non-zero
407
+ timeout=3000 # Set a reasonable timeout (e.g., 5 minutes)
408
+ )
409
+ logging.info("FFMPEG command executed successfully.")
410
+ logging.info(f"FFMPEG stdout:\n{process.stdout}")
411
+ logging.info(f"FFMPEG stderr:\n{process.stderr}")
412
+
413
+ except subprocess.CalledProcessError as e:
414
+ error_output = e.stderr or e.stdout
415
+ logging.error(f"FFMPEG execution failed! Return code: {e.returncode}\nCommand: {' '.join(e.cmd)}\nOutput:\n{error_output}")
416
+ raise gr.Error(f"FFMPEG execution failed.\nCommand: ffmpeg {' '.join(final_command[1:])}\nError: {error_output[:1000]}...") # Show more error context
417
+ except subprocess.TimeoutExpired as e:
418
+ logging.error(f"FFMPEG command timed out after {e.timeout} seconds.\nCommand: {' '.join(e.cmd)}")
419
+ raise gr.Error(f"FFMPEG command timed out after {e.timeout} seconds. The operation might be too complex or the files too large.")
420
+
421
+
422
+ # 6. Prepare Output
423
+ # Display the command used (using the originally generated args + output)
424
+ display_command_args = args + ["-y", "output.mp4"] # Reconstruct for display
425
+ generated_command_markdown = f"### Generated Command\n```bash\nffmpeg {' '.join(display_command_args[1:])}\n```"
426
+
427
+ # Return the path to the generated video and the command markdown
428
+ # Gradio needs the actual path; it will handle cleanup if temp_dir_obj goes out of scope
429
+ # However, explicitly returning the temp dir object might be safer depending on Gradio version
430
+ # For simplicity, returning the path and relying on Gradio's handling of temp files.
431
+ return output_file_path, gr.update(value=generated_command_markdown)
432
+
433
+ except Exception as e:
434
+ # Catch any other unexpected errors during setup or execution
435
+ logging.error(f"Error in update function: {e}", exc_info=True)
436
+ # Clean up the temp directory manually if an error occurred before returning
437
+ temp_dir_obj.cleanup()
438
+ raise gr.Error(f"An unexpected error occurred: {e}")
439
+
440
+ # No finally block needed for temp_dir_obj.cleanup() if using 'with TemporaryDirectory()'
441
+ # If not using 'with', ensure cleanup happens in try/except/finally
442
+
443
+
444
+ # --- Gradio Interface ---
445
+ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft()) as demo:
446
+ gr.Markdown(
447
+ """
448
+ # 🏞 AI Video Editor: Your Smart Editing Assistant 🎬
449
+
450
+ Welcome to the AI Video Editor! This powerful tool leverages advanced AI models like **Qwen2.5-Coder** and **DeepSeek-V3** to understand your editing needs expressed in plain English. Simply upload your video, audio, or image files, describe the desired outcome, and watch as the AI generates the necessary **FFMPEG command** to create your final video.
451
+
452
+ **No complex software or coding required!** Perfect for quick edits, batch processing ideas, learning FFMPEG syntax, or automating simple video tasks. Whether you need to trim, merge, add text, change speed, apply filters, or combine different media types, just tell the AI what you want.
453
+
454
+ **Get started now:** Upload your files, type your instructions, and hit "Run"!
455
+ """,
456
+ elem_id="header",
457
+ )
458
+
459
+ with gr.Accordion("📋 Usage Instructions & Examples", open=False):
460
+ gr.Markdown(
461
+ """
462
+ ### How to Use AI Video Editor
463
+
464
+ 1. **Upload Media Files**: Drag & drop or click to upload your video, image, or audio files (`.mp4`, `.mov`, `.mp3`, `.wav`, `.jpg`, `.png`, etc.) into the "Media files" area. Multiple files are allowed.
465
+ 2. **Write Instructions**: Clearly describe the editing task in the "Instructions" textbox. Be specific for best results.
466
+ 3. **(Optional) Adjust Parameters**:
467
+ * **Model**: Choose the AI model you want to use. Different models might have varying strengths in understanding instructions or FFMPEG knowledge.
468
+ * **Top-p & Temperature**: Fine-tune the AI's creativity and randomness. Lower temperature (e.g., 0.1) leads to more predictable results, higher values increase randomness. Top-p controls the diversity of the AI's choices. Default values are usually good starting points.
469
+ 4. **Generate**: Click the **"Run"** button. The AI will generate an FFMPEG command, which will then be executed to produce your video.
470
+ 5. **Review**: The resulting video will appear in the "Generated Video" player. The exact FFMPEG command used will be shown below it.
471
+
472
+ ### Example Instructions
473
+
474
+ * `Trim the video to keep only the segment from 10 seconds to 25 seconds.`
475
+ * `Concatenate video1.mp4 and video2.mp4 into a single video.`
476
+ * `Add a text overlay "My Vacation 2024" at the bottom center with a white font.`
477
+ * `Convert the input video to black and white.`
478
+ * `Create a slideshow from image1.png and image2.png, each shown for 5 seconds, with background_music.mp3.`
479
+ * `Resize the video to 1280x720 pixels.`
480
+ * `Speed up the video by 2x.`
481
+ * `Extract the audio track from the video as an mp3 file.` (Note: Current setup forces mp4 output, adjust system prompt if other outputs needed)
482
+ * `Create a picture-in-picture effect with small_video.mp4 overlaid on the top right corner of main_video.mp4.`
483
+ * `Generate a waveform visualization for the audio file.`
484
+
485
+ ### Tips for Better Results
486
+
487
+ * **Be Specific**: Instead of "make it shorter," say "remove the first 5 seconds."
488
+ * **Use Filenames**: Refer to files by their names (e.g., `Combine intro.mp4 and main.mp4`). The AI uses names with spaces replaced by underscores.
489
+ * **Specify Details**: For text, mention font size, color, position (e.g., `top_left`, `center`, `bottom_right`). For effects, specify parameters (e.g., `fade duration of 1 second`).
490
+ * **Keep it Simple**: Aim for one primary goal per instruction. Complex multi-step edits might require breaking down the task or might exceed the AI's ability to generate a single, simple command.
491
+ """
492
+ )
493
+
494
+ with gr.Row():
495
+ with gr.Column(scale=1):
496
+ user_files = gr.File(
497
+ file_count="multiple",
498
+ label="Upload Media Files",
499
+ file_types=allowed_medias,
500
+ # Consider adding interactive=True if needed, default is True
501
+ )
502
+ user_prompt = gr.Textbox(
503
+ placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
504
+ label="Instructions / Editing Objective",
505
+ lines=3,
506
+ )
507
+ with gr.Accordion("Advanced Parameters", open=False):
508
+ model_choice = gr.Radio(
509
+ choices=list(MODELS.keys()),
510
+ value=initial_model_choice, # Use the determined initial model
511
+ label="Select AI Model",
512
+ )
513
+ top_p = gr.Slider(
514
+ minimum=0.0, maximum=1.0, value=0.7, step=0.05,
515
+ label="Top-p (Controls diversity)",
516
+ )
517
+ temperature = gr.Slider(
518
+ minimum=0.0, maximum=2.0, value=0.1, step=0.1, # Max temp usually 1.0 or 2.0
519
+ label="Temperature (Controls randomness)",
520
+ )
521
+ btn = gr.Button("🚀 Run Edit", variant="primary")
522
+
523
+ with gr.Column(scale=1):
524
+ generated_video = gr.Video(
525
+ label="Generated Video Output",
526
+ interactive=False, # User cannot change the video here
527
+ include_audio=True,
528
+ )
529
+ generated_command = gr.Markdown(label="Generated FFMPEG Command")
530
+
531
+ # Link button click to the update function
532
+ btn.click(
533
+ fn=update,
534
+ inputs=[user_files, user_prompt, top_p, temperature, model_choice],
535
+ outputs=[generated_video, generated_command],
536
+ api_name="generate_edit" # Optional: Define API endpoint name
537
+ )
538
+
539
+ # Examples Section
540
+ gr.Examples(
541
+ examples=[
542
+ [
543
+ ["./examples/Jiangnan_Rain.mp4"], # Make sure this path exists or adjust
544
+ "Add a text watermark 'Sample Video' to the upper right corner of the video with white text and semi-transparent background.",
545
+ 0.7, 0.1, list(MODELS.keys())[0],
546
+ ],
547
+ [
548
+ ["./examples/Jiangnan_Rain.mp4"],
549
+ "Cut the video to extract only the middle 30 seconds (starting at 00:30 and ending at 01:00).",
550
+ 0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)], # Use second model if available
551
+ ],
552
+ [
553
+ ["./examples/Lotus_Pond01.mp4"], # Make sure this path exists or adjust
554
+ "Convert the video to black and white (grayscale) while maintaining the original audio.",
555
+ 0.7, 0.1, list(MODELS.keys())[0],
556
+ ],
557
+ [
558
+ ["./examples/Lotus_Pond01.mp4"],
559
+ "Create a slow-motion version of the video by reducing the speed to 0.5x.",
560
+ 0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)],
561
+ ],
562
+ [
563
+ ["./examples/image1.jpg", "./examples/image2.png", "./examples/background.mp3"], # Example with images and audio
564
+ "Create a video slideshow from image1.jpg and image2.png, showing each image for 4 seconds. Use background.mp3 as the audio track.",
565
+ 0.7, 0.1, list(MODELS.keys())[0],
566
+ ],
567
+ ],
568
+ inputs=[user_files, user_prompt, top_p, temperature, model_choice],
569
+ outputs=[generated_video, generated_command],
570
+ fn=update,
571
+ cache_examples=False, # Set to True if example files are stable and processing is slow
572
+ label="Example Use Cases (Click to Run)",
573
+ run_on_click=True,
574
+ )
575
+
576
+ # Removed the footer markdown about pull requests
577
+
578
+ # --- Launch the App ---
579
+ # Consider adding concurrency limits based on your hosting capabilities
580
+ demo.queue(default_concurrency_limit=50)
581
+ # demo.launch(show_api=False, server_name="0.0.0.0") # Allow external access if needed
582
+ demo.launch(show_api=False,enable_analytics=False) # Default launch for local/Hugging Face Spaces