Spaces:

rahul7star
/

wan2.1-Diffuser

Runtime error

File size: 2,887 Bytes

adced09
12a63af
 
 
b85cf9b
12a63af
 
 
 
 
b85cf9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12a63af
 
 
 
 
 
b85cf9b
12a63af
 
 
 
 
 
dd860f1
12a63af
 
dd860f1
12a63af
 
 
 
dd860f1
 
 
 
 
391f5b4
 
 
 
 
 
 
12a63af
 
 
dd860f1
 
12a63af
391f5b4
 
12a63af
 
 
 
 
 
 
 
 
 
 
adced09
 
12a63af

import torch
import gradio as gr
import imageio
import os
import requests
from safetensors.torch import load_file
from torchvision import transforms
from PIL import Image
import numpy as np

# Define model URL and local path
MODEL_URL = "https://huggingface.co/sarthak247/Wan2.1-T2V-1.3B-nf4/resolve/main/diffusion_pytorch_model.safetensors"
MODEL_FILE = "diffusion_pytorch_model.safetensors"

# Function to download model if not present
def download_model():
    if not os.path.exists(MODEL_FILE):
        print("Downloading model...")
        response = requests.get(MODEL_URL, stream=True)
        if response.status_code == 200:
            with open(MODEL_FILE, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print("Download complete!")
        else:
            raise RuntimeError(f"Failed to download model: {response.status_code}")

# Load model weights manually
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model on {device}...")

try:
    download_model()
    model_weights = load_file(MODEL_FILE, device=device)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    model_weights = None

# Function to generate video using the model
def generate_video(prompt):
    """
    Generates a video using the model based on the provided text prompt.
    """
    if model_weights is None:
        return "Model failed to load. Please check the logs."

    # Placeholder - actual inference logic should be implemented here
    # Example of using the model to generate an image from a prompt
    # For now, we'll create a random color image as a placeholder.
    
    # Assuming the model generates an image based on the prompt (modify with actual logic)
    width, height = 512, 512
    img = Image.new("RGB", (width, height), 
                    color=(random.randint(0, 255), 
                           random.randint(0, 255), 
                           random.randint(0, 255)))  # Random color

    # Transform the image to a tensor and convert it to a numpy array
    transform = transforms.ToTensor()
    frame = (transform(img).permute(1, 2, 0).numpy() * 255).astype(np.uint8)

    # Create a fake video with repeated frames (replace with actual frame generation)
    frames = [frame] * 16  # 16 repeated frames (replace with actual video frames from the model)
    output_path = "output.mp4"
    
    # Save frames as a video with 8 fps
    imageio.mimsave(output_path, frames, fps=8)

    return output_path

# Gradio UI
iface = gr.Interface(
    fn=generate_video,
    inputs=gr.Textbox(label="Enter Text Prompt"),
    outputs=gr.Video(label="Generated Video"),
    title="Wan2.1-T2V-1.3B Video Generation",
    description="This app loads the model manually and generates text-to-video output."
)

iface.launch()