Spaces:
Runtime error
Runtime error
File size: 2,887 Bytes
adced09 12a63af b85cf9b 12a63af b85cf9b 12a63af b85cf9b 12a63af dd860f1 12a63af dd860f1 12a63af dd860f1 391f5b4 12a63af dd860f1 12a63af 391f5b4 12a63af adced09 12a63af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import torch
import gradio as gr
import imageio
import os
import requests
from safetensors.torch import load_file
from torchvision import transforms
from PIL import Image
import numpy as np
# Define model URL and local path
MODEL_URL = "https://huggingface.co/sarthak247/Wan2.1-T2V-1.3B-nf4/resolve/main/diffusion_pytorch_model.safetensors"
MODEL_FILE = "diffusion_pytorch_model.safetensors"
# Function to download model if not present
def download_model():
if not os.path.exists(MODEL_FILE):
print("Downloading model...")
response = requests.get(MODEL_URL, stream=True)
if response.status_code == 200:
with open(MODEL_FILE, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("Download complete!")
else:
raise RuntimeError(f"Failed to download model: {response.status_code}")
# Load model weights manually
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model on {device}...")
try:
download_model()
model_weights = load_file(MODEL_FILE, device=device)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
model_weights = None
# Function to generate video using the model
def generate_video(prompt):
"""
Generates a video using the model based on the provided text prompt.
"""
if model_weights is None:
return "Model failed to load. Please check the logs."
# Placeholder - actual inference logic should be implemented here
# Example of using the model to generate an image from a prompt
# For now, we'll create a random color image as a placeholder.
# Assuming the model generates an image based on the prompt (modify with actual logic)
width, height = 512, 512
img = Image.new("RGB", (width, height),
color=(random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255))) # Random color
# Transform the image to a tensor and convert it to a numpy array
transform = transforms.ToTensor()
frame = (transform(img).permute(1, 2, 0).numpy() * 255).astype(np.uint8)
# Create a fake video with repeated frames (replace with actual frame generation)
frames = [frame] * 16 # 16 repeated frames (replace with actual video frames from the model)
output_path = "output.mp4"
# Save frames as a video with 8 fps
imageio.mimsave(output_path, frames, fps=8)
return output_path
# Gradio UI
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(label="Enter Text Prompt"),
outputs=gr.Video(label="Generated Video"),
title="Wan2.1-T2V-1.3B Video Generation",
description="This app loads the model manually and generates text-to-video output."
)
iface.launch()
|