Spaces:
Running
Running
Anurag Bhardwaj
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,98 +1,108 @@
|
|
1 |
-
import
|
2 |
-
import
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
except ModuleNotFoundError:
|
11 |
-
# Install diffsynth directly from GitHub if not found
|
12 |
-
install("git+https://github.com/Isi-dev/DiffSynth-Studio.git#egg=diffsynth")
|
13 |
-
from diffsynth import ModelManager, WanVideoPipeline, save_video
|
14 |
-
|
15 |
-
try:
|
16 |
-
import gradio as gr
|
17 |
-
except ModuleNotFoundError:
|
18 |
-
install("gradio")
|
19 |
-
import gradio as gr
|
20 |
-
|
21 |
-
try:
|
22 |
-
import torch
|
23 |
-
except ModuleNotFoundError:
|
24 |
-
install("torch")
|
25 |
-
import torch
|
26 |
-
|
27 |
-
# If needed, you can add similar checks for other dependencies
|
28 |
-
|
29 |
-
# Initialize model manager and load the models (do this once at startup)
|
30 |
-
model_manager = ModelManager(device="cpu")
|
31 |
-
model_manager.load_models(
|
32 |
-
[
|
33 |
-
"models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
|
34 |
-
"models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
|
35 |
-
"models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
|
36 |
-
],
|
37 |
-
torch_dtype=torch.float8_e4m3fn # or torch.bfloat16 to disable FP8 quantization
|
38 |
)
|
39 |
|
40 |
-
# Initialize
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
def generate_video(
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
try:
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
-
return f"Error
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
negative_prompt=negative_prompt,
|
59 |
-
height=height,
|
60 |
-
width=width,
|
61 |
-
num_frames=81,
|
62 |
-
num_inference_steps=sample_steps,
|
63 |
-
seed=seed,
|
64 |
-
tiled=True
|
65 |
-
)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
inputs=[
|
77 |
-
gr.Textbox(
|
78 |
-
label="Prompt",
|
79 |
-
value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."
|
80 |
-
),
|
81 |
-
gr.Textbox(
|
82 |
-
label="Negative Prompt",
|
83 |
-
value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
|
84 |
-
),
|
85 |
-
gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
|
86 |
-
gr.Dropdown(
|
87 |
-
label="Resolution (Width*Height)",
|
88 |
-
choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
|
89 |
-
value="480*832"
|
90 |
-
),
|
91 |
-
gr.Number(label="Seed", value=1)
|
92 |
-
],
|
93 |
-
outputs=gr.Video(label="Generated Video"),
|
94 |
-
title="DiffSynth Video Generator"
|
95 |
-
)
|
96 |
|
97 |
-
|
98 |
-
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
|
6 |
|
7 |
+
# Download models (run once at startup)
|
8 |
+
REPO_ID = "Isi99999/Wan2.1-T2V-14B"
|
9 |
+
MODEL_PATH = snapshot_download(
|
10 |
+
repo_id=REPO_ID,
|
11 |
+
allow_patterns=["*.safetensors", "*.pth", "*.json"],
|
12 |
+
local_dir="models/Wan-AI/Wan2.1-T2V-14B",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
)
|
14 |
|
15 |
+
# Initialize model manager and pipeline (cache these)
|
16 |
+
def load_models():
|
17 |
+
model_manager = ModelManager(device="cuda")
|
18 |
+
model_manager.load_models(
|
19 |
+
[
|
20 |
+
f"{MODEL_PATH}/diffusion_pytorch_model.safetensors",
|
21 |
+
f"{MODEL_PATH}/models_t5_umt5-xxl-enc-bf16.safetensors",
|
22 |
+
f"{MODEL_PATH}/Wan2.1_VAE.pth",
|
23 |
+
],
|
24 |
+
torch_dtype=torch.float8_e4m3fn
|
25 |
+
)
|
26 |
+
pipe = WanVideoPipeline.from_model_manager(
|
27 |
+
model_manager,
|
28 |
+
torch_dtype=torch.bfloat16,
|
29 |
+
device="cuda"
|
30 |
+
)
|
31 |
+
pipe.enable_vram_management(num_persistent_param_in_dit=None)
|
32 |
+
return pipe
|
33 |
+
|
34 |
+
pipe = load_models()
|
35 |
|
36 |
+
def generate_video(
|
37 |
+
prompt,
|
38 |
+
negative_prompt,
|
39 |
+
sample_steps,
|
40 |
+
width,
|
41 |
+
height,
|
42 |
+
seed
|
43 |
+
):
|
44 |
try:
|
45 |
+
# Generate video
|
46 |
+
video = pipe(
|
47 |
+
prompt=prompt,
|
48 |
+
negative_prompt=negative_prompt,
|
49 |
+
height=height,
|
50 |
+
width=width,
|
51 |
+
num_frames=81,
|
52 |
+
num_inference_steps=sample_steps,
|
53 |
+
seed=seed,
|
54 |
+
tiled=True
|
55 |
+
)
|
56 |
+
|
57 |
+
# Save video
|
58 |
+
output_path = "output_video.mp4"
|
59 |
+
save_video(video, output_path, fps=15, quality=5)
|
60 |
+
return output_path
|
61 |
+
|
62 |
except Exception as e:
|
63 |
+
return f"Error generating video: {str(e)}"
|
64 |
|
65 |
+
# Gradio UI
|
66 |
+
with gr.Blocks(title="Wan Video Generator") as demo:
|
67 |
+
gr.Markdown("# 🎥 Wan 2.1 Text-to-Video Generator")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
with gr.Row():
|
70 |
+
with gr.Column():
|
71 |
+
prompt = gr.Textbox(
|
72 |
+
label="Prompt",
|
73 |
+
value="A highly detailed, realistic AI-generated portrait..."
|
74 |
+
)
|
75 |
+
negative_prompt = gr.Textbox(
|
76 |
+
label="Negative Prompt",
|
77 |
+
value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品..."
|
78 |
+
)
|
79 |
+
sample_steps = gr.Slider(
|
80 |
+
minimum=1, maximum=100, value=30, label="Inference Steps"
|
81 |
+
)
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
width = gr.Dropdown(
|
85 |
+
[480, 720, 832, 1024, 1280],
|
86 |
+
value=480,
|
87 |
+
label="Width"
|
88 |
+
)
|
89 |
+
height = gr.Dropdown(
|
90 |
+
[832, 480, 720, 1024, 1280],
|
91 |
+
value=832,
|
92 |
+
label="Height"
|
93 |
+
)
|
94 |
+
|
95 |
+
seed = gr.Number(value=1, label="Seed")
|
96 |
+
generate_btn = gr.Button("Generate Video")
|
97 |
+
|
98 |
+
with gr.Column():
|
99 |
+
output_video = gr.Video(label="Generated Video")
|
100 |
|
101 |
+
generate_btn.click(
|
102 |
+
fn=generate_video,
|
103 |
+
inputs=[prompt, negative_prompt, sample_steps, width, height, seed],
|
104 |
+
outputs=output_video
|
105 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
if __name__ == "__main__":
|
108 |
+
demo.launch(debug=True, share=True)
|