Spaces:

Kidbea
/

text-to-video

Running

App Files Files Community

Anurag Bhardwaj commited on Mar 21

Commit

0efa5c1

verified ·

1 Parent(s): becb58f

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -88

app.py CHANGED Viewed

@@ -1,98 +1,108 @@
-import sys
-import subprocess
-def install(package):
-    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-# Ensure required packages are installed
-try:
-    from diffsynth import ModelManager, WanVideoPipeline, save_video
-except ModuleNotFoundError:
-    # Install diffsynth directly from GitHub if not found
-    install("git+https://github.com/Isi-dev/DiffSynth-Studio.git#egg=diffsynth")
-    from diffsynth import ModelManager, WanVideoPipeline, save_video
-try:
-    import gradio as gr
-except ModuleNotFoundError:
-    install("gradio")
-    import gradio as gr
-try:
-    import torch
-except ModuleNotFoundError:
-    install("torch")
-    import torch
-# If needed, you can add similar checks for other dependencies
-# Initialize model manager and load the models (do this once at startup)
-model_manager = ModelManager(device="cpu")
-model_manager.load_models(
-    [
-        "models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
-        "models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
-        "models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
-    ],
-    torch_dtype=torch.float8_e4m3fn  # or torch.bfloat16 to disable FP8 quantization
 )
-# Initialize the video pipeline (using CUDA if available)
-pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
-pipe.enable_vram_management(num_persistent_param_in_dit=None)
-print("✅ All models loaded successfully!")
-def generate_video(prompt, negative_prompt, sample_steps, resolution, seed):
-    """
-    Generate a video based on the provided text prompt and parameters.
-    """
-    # Parse resolution string (e.g., "480*832" splits into width and height)
     try:
-        width, height = map(int, resolution.split('*'))
     except Exception as e:
-        return f"Error parsing resolution: {e}"
-    # Generate video using the pipeline
-    video = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        height=height,
-        width=width,
-        num_frames=81,
-        num_inference_steps=sample_steps,
-        seed=seed,
-        tiled=True
-    )
-    # Save the generated video to a file
-    output_path = "video1.mp4"
-    save_video(video, output_path, fps=15, quality=5)
-    return output_path
-# Create the Gradio interface for the Hugging Face Space
-interface = gr.Interface(
-    fn=generate_video,
-    inputs=[
-        gr.Textbox(
-            label="Prompt",
-            value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."
-        ),
-        gr.Textbox(
-            label="Negative Prompt",
-            value="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
-        ),
-        gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
-        gr.Dropdown(
-            label="Resolution (Width*Height)",
-            choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
-            value="480*832"
-        ),
-        gr.Number(label="Seed", value=1)
-    ],
-    outputs=gr.Video(label="Generated Video"),
-    title="DiffSynth Video Generator"
-)
-# Launch the Gradio app in the Hugging Face Space
-interface.launch()

+import gradio as gr
+import torch
+import os
+from huggingface_hub import snapshot_download
+from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
+# Download models (run once at startup)
+REPO_ID = "Isi99999/Wan2.1-T2V-14B"
+MODEL_PATH = snapshot_download(
+    repo_id=REPO_ID,
+    allow_patterns=["*.safetensors", "*.pth", "*.json"],
+    local_dir="models/Wan-AI/Wan2.1-T2V-14B",
 )
+# Initialize model manager and pipeline (cache these)
+def load_models():
+    model_manager = ModelManager(device="cuda")
+    model_manager.load_models(
+        [
+            f"{MODEL_PATH}/diffusion_pytorch_model.safetensors",
+            f"{MODEL_PATH}/models_t5_umt5-xxl-enc-bf16.safetensors",
+            f"{MODEL_PATH}/Wan2.1_VAE.pth",
+        ],
+        torch_dtype=torch.float8_e4m3fn
+    )
+    pipe = WanVideoPipeline.from_model_manager(
+        model_manager,
+        torch_dtype=torch.bfloat16,
+        device="cuda"
+    )
+    pipe.enable_vram_management(num_persistent_param_in_dit=None)
+    return pipe
+pipe = load_models()
+def generate_video(
+    prompt,
+    negative_prompt,
+    sample_steps,
+    width,
+    height,
+    seed
+):
     try:
+        # Generate video
+        video = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_frames=81,
+            num_inference_steps=sample_steps,
+            seed=seed,
+            tiled=True
+        )
+        # Save video
+        output_path = "output_video.mp4"
+        save_video(video, output_path, fps=15, quality=5)
+        return output_path
     except Exception as e:
+        return f"Error generating video: {str(e)}"
+# Gradio UI
+with gr.Blocks(title="Wan Video Generator") as demo:
+    gr.Markdown("# 🎥 Wan 2.1 Text-to-Video Generator")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Prompt",
+                value="A highly detailed, realistic AI-generated portrait..."
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品..."
+            )
+            sample_steps = gr.Slider(
+                minimum=1, maximum=100, value=30, label="Inference Steps"
+            )
+            with gr.Row():
+                width = gr.Dropdown(
+                    [480, 720, 832, 1024, 1280],
+                    value=480,
+                    label="Width"
+                )
+                height = gr.Dropdown(
+                    [832, 480, 720, 1024, 1280],
+                    value=832,
+                    label="Height"
+                )
+            seed = gr.Number(value=1, label="Seed")
+            generate_btn = gr.Button("Generate Video")
+        with gr.Column():
+            output_video = gr.Video(label="Generated Video")
+    generate_btn.click(
+        fn=generate_video,
+        inputs=[prompt, negative_prompt, sample_steps, width, height, seed],
+        outputs=output_video
+    )
+if __name__ == "__main__":
+    demo.launch(debug=True, share=True)