import torch import gradio as gr import imageio import os import requests from safetensors.torch import load_file from torchvision import transforms from PIL import Image import numpy as np # Define model URL and local path MODEL_URL = "https://huggingface.co/sarthak247/Wan2.1-T2V-1.3B-nf4/resolve/main/diffusion_pytorch_model.safetensors" MODEL_FILE = "diffusion_pytorch_model.safetensors" # Function to download model if not present def download_model(): if not os.path.exists(MODEL_FILE): print("Downloading model...") response = requests.get(MODEL_URL, stream=True) if response.status_code == 200: with open(MODEL_FILE, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print("Download complete!") else: raise RuntimeError(f"Failed to download model: {response.status_code}") # Load model weights manually device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Loading model on {device}...") try: download_model() model_weights = load_file(MODEL_FILE, device=device) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") model_weights = None # Function to generate video using the model def generate_video(prompt): """ Generates a video using the model based on the provided text prompt. """ if model_weights is None: return "Model failed to load. Please check the logs." # Placeholder - actual inference logic should be implemented here # Example of using the model to generate an image from a prompt # For now, we'll create a random color image as a placeholder. # Assuming the model generates an image based on the prompt (modify with actual logic) width, height = 512, 512 img = Image.new("RGB", (width, height), color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))) # Random color # Transform the image to a tensor and convert it to a numpy array transform = transforms.ToTensor() frame = (transform(img).permute(1, 2, 0).numpy() * 255).astype(np.uint8) # Create a fake video with repeated frames (replace with actual frame generation) frames = [frame] * 16 # 16 repeated frames (replace with actual video frames from the model) output_path = "output.mp4" # Save frames as a video with 8 fps imageio.mimsave(output_path, frames, fps=8) return output_path # Gradio UI iface = gr.Interface( fn=generate_video, inputs=gr.Textbox(label="Enter Text Prompt"), outputs=gr.Video(label="Generated Video"), title="Wan2.1-T2V-1.3B Video Generation", description="This app loads the model manually and generates text-to-video output." ) iface.launch()