|
import gradio as gr |
|
import openai |
|
import ffmpeg |
|
import os |
|
import uuid |
|
import base64 |
|
import requests |
|
import tempfile |
|
import shutil |
|
import re |
|
import time |
|
import concurrent.futures |
|
from pathlib import Path |
|
from dotenv import load_dotenv |
|
from huggingface_hub import SpaceStage |
|
from huggingface_hub.utils import HfHubHTTPError |
|
|
|
|
|
try: |
|
from spaces import GPU |
|
use_gpu = True |
|
@GPU |
|
def get_gpu(): |
|
return True |
|
|
|
get_gpu() |
|
except ImportError: |
|
use_gpu = False |
|
print("Running without GPU acceleration") |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
DEFAULT_API_KEY = os.getenv("OPENAI_API_KEY", "") |
|
|
|
def process_frame(frame_path, style_prompt, api_key): |
|
"""Process a single frame with GPT-4o analysis and DALL-E 3 generation""" |
|
try: |
|
|
|
with open(frame_path, "rb") as img_file: |
|
img_bytes = img_file.read() |
|
|
|
|
|
analysis_messages = [ |
|
{"role": "system", "content": "You are an expert at analyzing images and describing them for AI image generation. For each image, provide a detailed description focusing on its visual content, composition, and elements that would help generate a Studio Ghibli style version."}, |
|
{"role": "user", "content": [ |
|
{"type": "text", "text": f"Analyze this image and provide a detailed description that could be used to recreate it in Studio Ghibli animation style. Focus on the essential visual elements that should be preserved and how they should be adapted to Ghibli aesthetic."}, |
|
{"type": "image_url", "image_url": { |
|
"url": f"data:image/png;base64,{base64.b64encode(img_bytes).decode('utf-8')}" |
|
}} |
|
]} |
|
] |
|
|
|
openai.api_key = api_key |
|
analysis_response = openai.chat.completions.create( |
|
model="gpt-4o", |
|
messages=analysis_messages, |
|
max_tokens=800 |
|
) |
|
|
|
|
|
image_description = analysis_response.choices[0].message.content |
|
print(f"GPT-4o analysis for frame {os.path.basename(frame_path)}: {image_description[:150]}...") |
|
|
|
|
|
dall_e_prompt = f"Create a Studio Ghibli style animation frame that shows: {image_description}. {style_prompt}. Hand-drawn animation style, soft colors, attention to detail, Miyazaki aesthetic." |
|
|
|
|
|
if len(dall_e_prompt) > 4000: |
|
dall_e_prompt = dall_e_prompt[:3997] + "..." |
|
|
|
dalle_response = openai.images.generate( |
|
model="dall-e-3", |
|
prompt=dall_e_prompt, |
|
n=1, |
|
size="1024x1024", |
|
quality="standard" |
|
) |
|
|
|
|
|
img_url = dalle_response.data[0].url |
|
print(f"Generated DALL-E image for frame {os.path.basename(frame_path)}") |
|
|
|
|
|
img_response = requests.get(img_url, timeout=30) |
|
if img_response.status_code == 200: |
|
with open(frame_path, "wb") as out_img: |
|
out_img.write(img_response.content) |
|
print(f"Successfully saved stylized frame: {os.path.basename(frame_path)}") |
|
return True |
|
else: |
|
print(f"Failed to download image: HTTP {img_response.status_code}") |
|
return False |
|
|
|
except Exception as e: |
|
import traceback |
|
print(f"Error processing frame {os.path.basename(frame_path)}: {str(e)}") |
|
print(traceback.format_exc()) |
|
return False |
|
|
|
def stylize_video(video_path, style_prompt, api_key): |
|
|
|
actual_api_key = api_key if api_key else DEFAULT_API_KEY |
|
|
|
if not actual_api_key: |
|
return None, "Please provide your OpenAI API key" |
|
|
|
try: |
|
|
|
temp_dir = tempfile.mkdtemp() |
|
input_filename = os.path.join(temp_dir, "input.mp4") |
|
frames_dir = os.path.join(temp_dir, "frames") |
|
os.makedirs(frames_dir, exist_ok=True) |
|
|
|
|
|
if isinstance(video_path, str): |
|
if video_path.startswith('http'): |
|
|
|
response = requests.get(video_path, stream=True) |
|
with open(input_filename, 'wb') as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
elif os.path.exists(video_path): |
|
|
|
shutil.copy(video_path, input_filename) |
|
else: |
|
return None, f"Video file not found: {video_path}" |
|
else: |
|
|
|
with open(input_filename, "wb") as f: |
|
f.write(video_path) |
|
|
|
|
|
if not os.path.exists(input_filename): |
|
return None, "Failed to save input video" |
|
|
|
|
|
ffmpeg.input(input_filename).output(f"{frames_dir}/%04d.png", vf="fps=1").run(quiet=True) |
|
|
|
|
|
frames = sorted([os.path.join(frames_dir, f) for f in os.listdir(frames_dir) if f.endswith('.png')]) |
|
if not frames: |
|
return None, "No frames were extracted from the video" |
|
|
|
|
|
if len(frames) > 15: |
|
|
|
frames = frames[:15] |
|
|
|
print(f"Processing {len(frames)} frames") |
|
|
|
|
|
num_workers = 3 if use_gpu else 2 |
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: |
|
futures = {executor.submit(process_frame, frame, style_prompt, actual_api_key): frame for frame in frames} |
|
|
|
|
|
processed_frames = [] |
|
for future in concurrent.futures.as_completed(futures): |
|
frame = futures[future] |
|
if future.result(): |
|
processed_frames.append(frame) |
|
print(f"Completed frame {os.path.basename(frame)} ({len(processed_frames)}/{len(frames)})") |
|
|
|
if not processed_frames: |
|
return None, "Failed to process any frames. Please make sure your OpenAI API key has access to both GPT-4o and DALL-E 3." |
|
|
|
|
|
print(f"Successfully processed {len(processed_frames)}/{len(frames)} frames") |
|
|
|
|
|
processed_frames.sort() |
|
|
|
|
|
output_filename = os.path.join(temp_dir, "stylized.mp4") |
|
|
|
|
|
|
|
ffmpeg.input(f"{frames_dir}/%04d.png", framerate=1) \ |
|
.output(output_filename, vcodec='libx264', pix_fmt='yuv420p', crf=18) \ |
|
.run(quiet=True) |
|
|
|
|
|
if not os.path.exists(output_filename) or os.path.getsize(output_filename) == 0: |
|
return None, "Failed to create output video" |
|
|
|
|
|
os.makedirs("outputs", exist_ok=True) |
|
persistent_output = os.path.join("outputs", f"stylized_{uuid.uuid4()}.mp4") |
|
shutil.copy(output_filename, persistent_output) |
|
|
|
|
|
print(f"Output video created at: {persistent_output}") |
|
|
|
|
|
shutil.rmtree(temp_dir) |
|
|
|
return persistent_output, f"Video stylized successfully with {len(processed_frames)} frames!" |
|
|
|
except Exception as e: |
|
import traceback |
|
traceback_str = traceback.format_exc() |
|
print(f"Error: {str(e)}\n{traceback_str}") |
|
return None, f"Error: {str(e)}" |
|
|
|
|
|
example_videos = [ |
|
["sample_video.mp4", "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"] |
|
] |
|
|
|
with gr.Blocks(title="Video-to-Ghibli Style Converter") as iface: |
|
gr.Markdown("# Video-to-Ghibli Style Converter") |
|
gr.Markdown("Upload a video and convert it to Studio Ghibli animation style using GPT-4o and DALL-E 3.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
|
|
video_input = gr.Video(label="Upload Video (up to 15 seconds)") |
|
|
|
api_key = gr.Textbox( |
|
label="OpenAI API Key (requires GPT-4o and DALL-E 3 access)", |
|
type="password", |
|
placeholder="Enter your OpenAI API key" |
|
) |
|
style_prompt = gr.Textbox( |
|
label="Style Prompt", |
|
value="Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style" |
|
) |
|
|
|
submit_btn = gr.Button("Stylize Video", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
|
|
video_output = gr.Video(label="Stylized Video") |
|
status_output = gr.Textbox(label="Status", value="Ready. Upload a video to start.") |
|
|
|
submit_btn.click( |
|
fn=stylize_video, |
|
inputs=[video_input, style_prompt, api_key], |
|
outputs=[video_output, status_output] |
|
) |
|
|
|
gr.Markdown(""" |
|
## Instructions |
|
1. Upload a video up to 15 seconds long |
|
2. Enter your OpenAI API key with GPT-4o and DALL-E 3 access |
|
3. Customize the style prompt if desired |
|
4. Click "Stylize Video" and wait for processing |
|
|
|
## Example Style Prompts |
|
- "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style" |
|
- "Studio Ghibli style with magical and dreamy atmosphere" |
|
- "Nostalgic Studio Ghibli animation style with watercolor backgrounds and clean linework" |
|
- "Ghibli-inspired animation with vibrant colors and fantasy elements" |
|
|
|
Note: Each frame is analyzed by GPT-4o and then transformed by DALL-E 3. |
|
Videos are processed at 1 frame per second to keep processing time reasonable. |
|
""") |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |