File size: 11,120 Bytes
6e67586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37617ef
 
 
 
 
 
 
6e67586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0a6bac
 
 
 
6e67586
 
 
 
 
 
37617ef
 
6e67586
37617ef
6e67586
 
 
 
 
 
 
 
 
 
37617ef
6e67586
37617ef
 
6e67586
d0a6bac
6e67586
 
 
 
 
 
 
 
 
d0a6bac
6e67586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import gradio as gr
import openai
import ffmpeg
import os
import uuid
import base64
import requests
import tempfile
import shutil
import re
import time
import concurrent.futures
from pathlib import Path
from dotenv import load_dotenv
from huggingface_hub import SpaceStage
from huggingface_hub.utils import HfHubHTTPError

# Add GPU decorator for Hugging Face Spaces
try:
    from spaces import GPU
    use_gpu = True
    @GPU
    def get_gpu():
        return True
    # Call the function to trigger GPU allocation
    get_gpu()
except ImportError:
    use_gpu = False
    print("Running without GPU acceleration")

# Load environment variables from .env file if it exists
load_dotenv()

# Get default API key from environment (will be '' if not set)
DEFAULT_API_KEY = os.getenv("OPENAI_API_KEY", "")

def process_frame(frame_path, style_prompt, api_key):
    """Process a single frame with GPT-4o analysis and DALL-E 3 generation"""
    try:
        # Read the image and encode to base64
        with open(frame_path, "rb") as img_file:
            img_bytes = img_file.read()
            
        # First use GPT-4o to analyze the image
        analysis_messages = [
            {"role": "system", "content": "You are an expert at analyzing images and describing them for AI image generation. For each image, provide a detailed description focusing on its visual content, composition, and elements that would help generate a Studio Ghibli style version."},
            {"role": "user", "content": [
                {"type": "text", "text": f"Analyze this image and provide a detailed description that could be used to recreate it in Studio Ghibli animation style. Focus on the essential visual elements that should be preserved and how they should be adapted to Ghibli aesthetic."},
                {"type": "image_url", "image_url": {
                    "url": f"data:image/png;base64,{base64.b64encode(img_bytes).decode('utf-8')}"
                }}
            ]}
        ]
        
        openai.api_key = api_key
        analysis_response = openai.chat.completions.create(
            model="gpt-4o",
            messages=analysis_messages,
            max_tokens=800
        )
        
        # Get the image description
        image_description = analysis_response.choices[0].message.content
        print(f"GPT-4o analysis for frame {os.path.basename(frame_path)}: {image_description[:150]}...")
        
        # Now use DALL-E 3 to generate a stylized version based on the description
        dall_e_prompt = f"Create a Studio Ghibli style animation frame that shows: {image_description}. {style_prompt}. Hand-drawn animation style, soft colors, attention to detail, Miyazaki aesthetic."
        
        # Ensure prompt isn't too long
        if len(dall_e_prompt) > 4000:
            dall_e_prompt = dall_e_prompt[:3997] + "..."
            
        dalle_response = openai.images.generate(
            model="dall-e-3",
            prompt=dall_e_prompt,
            n=1,
            size="1024x1024",
            quality="standard"
        )
        
        # Get the generated image URL
        img_url = dalle_response.data[0].url
        print(f"Generated DALL-E image for frame {os.path.basename(frame_path)}")
        
        # Download the image
        img_response = requests.get(img_url, timeout=30)
        if img_response.status_code == 200:
            with open(frame_path, "wb") as out_img:
                out_img.write(img_response.content)
            print(f"Successfully saved stylized frame: {os.path.basename(frame_path)}")
            return True
        else:
            print(f"Failed to download image: HTTP {img_response.status_code}")
            return False
        
    except Exception as e:
        import traceback
        print(f"Error processing frame {os.path.basename(frame_path)}: {str(e)}")
        print(traceback.format_exc())
        return False

def stylize_video(video_path, style_prompt, api_key):
    # Use the provided API key, or fall back to the default one
    actual_api_key = api_key if api_key else DEFAULT_API_KEY
    
    if not actual_api_key:
        return None, "Please provide your OpenAI API key"
    
    try:
        # Create temp directories
        temp_dir = tempfile.mkdtemp()
        input_filename = os.path.join(temp_dir, "input.mp4")
        frames_dir = os.path.join(temp_dir, "frames")
        os.makedirs(frames_dir, exist_ok=True)
        
        # Save the input video to a temporary file
        if isinstance(video_path, str):
            if video_path.startswith('http'):
                # It's a URL, download it
                response = requests.get(video_path, stream=True)
                with open(input_filename, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
            elif os.path.exists(video_path):
                # It's a file path, copy it
                shutil.copy(video_path, input_filename)
            else:
                return None, f"Video file not found: {video_path}"
        else:
            # Assume it's binary data
            with open(input_filename, "wb") as f:
                f.write(video_path)
                
        # Make sure the video file exists
        if not os.path.exists(input_filename):
            return None, "Failed to save input video"
            
        # Extract frames - using lower fps for longer videos (1 frame per second)
        ffmpeg.input(input_filename).output(f"{frames_dir}/%04d.png", vf="fps=1").run(quiet=True)
        
        # Check if frames were extracted
        frames = sorted([os.path.join(frames_dir, f) for f in os.listdir(frames_dir) if f.endswith('.png')])
        if not frames:
            return None, "No frames were extracted from the video"
            
        # Limit to a maximum of 15 frames for reasonable processing times (15 seconds at 1fps)
        if len(frames) > 15:
            # Take evenly distributed frames
            frames = frames[:15]
        
        print(f"Processing {len(frames)} frames")
        
        # Process frames in parallel with up to 2 concurrent workers to avoid rate limits
        num_workers = 3 if use_gpu else 2  # More workers if GPU is available
        with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
            futures = {executor.submit(process_frame, frame, style_prompt, actual_api_key): frame for frame in frames}
            
            # Collect results
            processed_frames = []
            for future in concurrent.futures.as_completed(futures):
                frame = futures[future]
                if future.result():
                    processed_frames.append(frame)
                    print(f"Completed frame {os.path.basename(frame)} ({len(processed_frames)}/{len(frames)})")
                    
        if not processed_frames:
            return None, "Failed to process any frames. Please make sure your OpenAI API key has access to both GPT-4o and DALL-E 3."
        
        # Even if not all frames were processed, try to create a video with what we have
        print(f"Successfully processed {len(processed_frames)}/{len(frames)} frames")
                    
        # Ensure frames are in the correct order (important for video continuity)
        processed_frames.sort()
        
        # Reassemble frames into video
        output_filename = os.path.join(temp_dir, "stylized.mp4")
        
        # Use a higher bitrate and better codec for higher quality
        # Note: We're using the original frames directory because the processed frame filenames match
        ffmpeg.input(f"{frames_dir}/%04d.png", framerate=1) \
              .output(output_filename, vcodec='libx264', pix_fmt='yuv420p', crf=18) \
              .run(quiet=True)
        
        # Check if the output file exists and has content
        if not os.path.exists(output_filename) or os.path.getsize(output_filename) == 0:
            return None, "Failed to create output video"
            
        # Copy to a persistent location for Gradio to serve
        os.makedirs("outputs", exist_ok=True)
        persistent_output = os.path.join("outputs", f"stylized_{uuid.uuid4()}.mp4")
        shutil.copy(output_filename, persistent_output)
        
        # Return the relative path (Gradio can handle this)
        print(f"Output video created at: {persistent_output}")
        
        # Cleanup temp files
        shutil.rmtree(temp_dir)
        
        return persistent_output, f"Video stylized successfully with {len(processed_frames)} frames!"
        
    except Exception as e:
        import traceback
        traceback_str = traceback.format_exc()
        print(f"Error: {str(e)}\n{traceback_str}")
        return None, f"Error: {str(e)}"

# Use Gradio examples feature with local files instead
example_videos = [
    ["sample_video.mp4", "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"]
]

with gr.Blocks(title="Video-to-Ghibli Style Converter") as iface:
    gr.Markdown("# Video-to-Ghibli Style Converter")
    gr.Markdown("Upload a video and convert it to Studio Ghibli animation style using GPT-4o and DALL-E 3.")
    
    with gr.Row():
        with gr.Column(scale=2):
            # Main input column
            video_input = gr.Video(label="Upload Video (up to 15 seconds)")
            
            api_key = gr.Textbox(
                label="OpenAI API Key (requires GPT-4o and DALL-E 3 access)", 
                type="password",
                placeholder="Enter your OpenAI API key"
            )
            style_prompt = gr.Textbox(
                label="Style Prompt", 
                value="Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"
            )
            
            submit_btn = gr.Button("Stylize Video", variant="primary")
        
        with gr.Column(scale=2):
            # Output column
            video_output = gr.Video(label="Stylized Video")
            status_output = gr.Textbox(label="Status", value="Ready. Upload a video to start.")
    
    submit_btn.click(
        fn=stylize_video,
        inputs=[video_input, style_prompt, api_key],
        outputs=[video_output, status_output]
    )
    
    gr.Markdown("""
    ## Instructions
    1. Upload a video up to 15 seconds long
    2. Enter your OpenAI API key with GPT-4o and DALL-E 3 access
    3. Customize the style prompt if desired
    4. Click "Stylize Video" and wait for processing
    
    ## Example Style Prompts
    - "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"
    - "Studio Ghibli style with magical and dreamy atmosphere"
    - "Nostalgic Studio Ghibli animation style with watercolor backgrounds and clean linework"
    - "Ghibli-inspired animation with vibrant colors and fantasy elements"
    
    Note: Each frame is analyzed by GPT-4o and then transformed by DALL-E 3.
    Videos are processed at 1 frame per second to keep processing time reasonable.
    """)

if __name__ == "__main__":
    iface.launch()