Anurag Bhardwaj commited on
Commit
0efa5c1
·
verified ·
1 Parent(s): becb58f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -88
app.py CHANGED
@@ -1,98 +1,108 @@
1
- import sys
2
- import subprocess
 
 
 
3
 
4
- def install(package):
5
- subprocess.check_call([sys.executable, "-m", "pip", "install", package])
6
-
7
- # Ensure required packages are installed
8
- try:
9
- from diffsynth import ModelManager, WanVideoPipeline, save_video
10
- except ModuleNotFoundError:
11
- # Install diffsynth directly from GitHub if not found
12
- install("git+https://github.com/Isi-dev/DiffSynth-Studio.git#egg=diffsynth")
13
- from diffsynth import ModelManager, WanVideoPipeline, save_video
14
-
15
- try:
16
- import gradio as gr
17
- except ModuleNotFoundError:
18
- install("gradio")
19
- import gradio as gr
20
-
21
- try:
22
- import torch
23
- except ModuleNotFoundError:
24
- install("torch")
25
- import torch
26
-
27
- # If needed, you can add similar checks for other dependencies
28
-
29
- # Initialize model manager and load the models (do this once at startup)
30
- model_manager = ModelManager(device="cpu")
31
- model_manager.load_models(
32
- [
33
- "models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
34
- "models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
35
- "models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
36
- ],
37
- torch_dtype=torch.float8_e4m3fn # or torch.bfloat16 to disable FP8 quantization
38
  )
39
 
40
- # Initialize the video pipeline (using CUDA if available)
41
- pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
42
- pipe.enable_vram_management(num_persistent_param_in_dit=None)
43
- print("✅ All models loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- def generate_video(prompt, negative_prompt, sample_steps, resolution, seed):
46
- """
47
- Generate a video based on the provided text prompt and parameters.
48
- """
49
- # Parse resolution string (e.g., "480*832" splits into width and height)
 
 
 
50
  try:
51
- width, height = map(int, resolution.split('*'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- return f"Error parsing resolution: {e}"
54
 
55
- # Generate video using the pipeline
56
- video = pipe(
57
- prompt=prompt,
58
- negative_prompt=negative_prompt,
59
- height=height,
60
- width=width,
61
- num_frames=81,
62
- num_inference_steps=sample_steps,
63
- seed=seed,
64
- tiled=True
65
- )
66
 
67
- # Save the generated video to a file
68
- output_path = "video1.mp4"
69
- save_video(video, output_path, fps=15, quality=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- return output_path
72
-
73
- # Create the Gradio interface for the Hugging Face Space
74
- interface = gr.Interface(
75
- fn=generate_video,
76
- inputs=[
77
- gr.Textbox(
78
- label="Prompt",
79
- value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."
80
- ),
81
- gr.Textbox(
82
- label="Negative Prompt",
83
- value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
84
- ),
85
- gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
86
- gr.Dropdown(
87
- label="Resolution (Width*Height)",
88
- choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
89
- value="480*832"
90
- ),
91
- gr.Number(label="Seed", value=1)
92
- ],
93
- outputs=gr.Video(label="Generated Video"),
94
- title="DiffSynth Video Generator"
95
- )
96
 
97
- # Launch the Gradio app in the Hugging Face Space
98
- interface.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ from huggingface_hub import snapshot_download
5
+ from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
6
 
7
+ # Download models (run once at startup)
8
+ REPO_ID = "Isi99999/Wan2.1-T2V-14B"
9
+ MODEL_PATH = snapshot_download(
10
+ repo_id=REPO_ID,
11
+ allow_patterns=["*.safetensors", "*.pth", "*.json"],
12
+ local_dir="models/Wan-AI/Wan2.1-T2V-14B",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  )
14
 
15
+ # Initialize model manager and pipeline (cache these)
16
+ def load_models():
17
+ model_manager = ModelManager(device="cuda")
18
+ model_manager.load_models(
19
+ [
20
+ f"{MODEL_PATH}/diffusion_pytorch_model.safetensors",
21
+ f"{MODEL_PATH}/models_t5_umt5-xxl-enc-bf16.safetensors",
22
+ f"{MODEL_PATH}/Wan2.1_VAE.pth",
23
+ ],
24
+ torch_dtype=torch.float8_e4m3fn
25
+ )
26
+ pipe = WanVideoPipeline.from_model_manager(
27
+ model_manager,
28
+ torch_dtype=torch.bfloat16,
29
+ device="cuda"
30
+ )
31
+ pipe.enable_vram_management(num_persistent_param_in_dit=None)
32
+ return pipe
33
+
34
+ pipe = load_models()
35
 
36
+ def generate_video(
37
+ prompt,
38
+ negative_prompt,
39
+ sample_steps,
40
+ width,
41
+ height,
42
+ seed
43
+ ):
44
  try:
45
+ # Generate video
46
+ video = pipe(
47
+ prompt=prompt,
48
+ negative_prompt=negative_prompt,
49
+ height=height,
50
+ width=width,
51
+ num_frames=81,
52
+ num_inference_steps=sample_steps,
53
+ seed=seed,
54
+ tiled=True
55
+ )
56
+
57
+ # Save video
58
+ output_path = "output_video.mp4"
59
+ save_video(video, output_path, fps=15, quality=5)
60
+ return output_path
61
+
62
  except Exception as e:
63
+ return f"Error generating video: {str(e)}"
64
 
65
+ # Gradio UI
66
+ with gr.Blocks(title="Wan Video Generator") as demo:
67
+ gr.Markdown("# 🎥 Wan 2.1 Text-to-Video Generator")
 
 
 
 
 
 
 
 
68
 
69
+ with gr.Row():
70
+ with gr.Column():
71
+ prompt = gr.Textbox(
72
+ label="Prompt",
73
+ value="A highly detailed, realistic AI-generated portrait..."
74
+ )
75
+ negative_prompt = gr.Textbox(
76
+ label="Negative Prompt",
77
+ value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品..."
78
+ )
79
+ sample_steps = gr.Slider(
80
+ minimum=1, maximum=100, value=30, label="Inference Steps"
81
+ )
82
+
83
+ with gr.Row():
84
+ width = gr.Dropdown(
85
+ [480, 720, 832, 1024, 1280],
86
+ value=480,
87
+ label="Width"
88
+ )
89
+ height = gr.Dropdown(
90
+ [832, 480, 720, 1024, 1280],
91
+ value=832,
92
+ label="Height"
93
+ )
94
+
95
+ seed = gr.Number(value=1, label="Seed")
96
+ generate_btn = gr.Button("Generate Video")
97
+
98
+ with gr.Column():
99
+ output_video = gr.Video(label="Generated Video")
100
 
101
+ generate_btn.click(
102
+ fn=generate_video,
103
+ inputs=[prompt, negative_prompt, sample_steps, width, height, seed],
104
+ outputs=output_video
105
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ if __name__ == "__main__":
108
+ demo.launch(debug=True, share=True)