# DiffSynth Studio

Welcome to DiffSynth Studio! This is an example of Diffutoon.

## Install

In [None]:
!git clone https://github.com/Artiprocher/DiffSynth-Studio.git
!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops cupy-cuda12x
%cd /content/DiffSynth-Studio

## Download Models

In [None]:
import requests


def download_model(url, file_path):
 model_file = requests.get(url, allow_redirects=True)
 with open(file_path, "wb") as f:
 f.write(model_file.content)

download_model("https://civitai.com/api/download/models/229575", "models/stable_diffusion/aingdiffusion_v12.safetensors")
download_model("https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt", "models/AnimateDiff/mm_sd_v15_v2.ckpt")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11p_sd15_lineart.pth")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth", "models/ControlNet/control_v11f1p_sd15_depth.pth")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth", "models/ControlNet/control_v11p_sd15_softedge.pth")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt", "models/Annotators/dpt_hybrid-midas-501f0c75.pt")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth", "models/Annotators/ControlNetHED.pth")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth", "models/Annotators/sk_model.pth")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth", "models/Annotators/sk_model2.pth")
download_model("https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16", "models/textual_inversion/verybadimagenegative_v1.3.pt")

## Run Diffutoon

### Config Template

In [None]:
config_stage_1_template = {
 "models": {
 "model_list": [
 "models/stable_diffusion/aingdiffusion_v12.safetensors",
 "models/ControlNet/control_v11p_sd15_softedge.pth",
 "models/ControlNet/control_v11f1p_sd15_depth.pth"
 ],
 "textual_inversion_folder": "models/textual_inversion",
 "device": "cuda",
 "lora_alphas": [],
 "controlnet_units": [
 {
 "processor_id": "softedge",
 "model_path": "models/ControlNet/control_v11p_sd15_softedge.pth",
 "scale": 0.5
 },
 {
 "processor_id": "depth",
 "model_path": "models/ControlNet/control_v11f1p_sd15_depth.pth",
 "scale": 0.5
 }
 ]
 },
 "data": {
 "input_frames": {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 512,
 "width": 512,
 "start_frame_id": 0,
 "end_frame_id": 30
 },
 "controlnet_frames": [
 {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 512,
 "width": 512,
 "start_frame_id": 0,
 "end_frame_id": 30
 },
 {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 512,
 "width": 512,
 "start_frame_id": 0,
 "end_frame_id": 30
 }
 ],
 "output_folder": "data/examples/diffutoon_edit/color_video",
 "fps": 25
 },
 "smoother_configs": [
 {
 "processor_type": "FastBlend",
 "config": {}
 }
 ],
 "pipeline": {
 "seed": 0,
 "pipeline_inputs": {
 "prompt": "best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings",
 "negative_prompt": "verybadimagenegative_v1.3",
 "cfg_scale": 7.0,
 "clip_skip": 1,
 "denoising_strength": 0.9,
 "num_inference_steps": 20,
 "animatediff_batch_size": 8,
 "animatediff_stride": 4,
 "unet_batch_size": 8,
 "controlnet_batch_size": 8,
 "cross_frame_attention": True,
 "smoother_progress_ids": [-1],
 # The following parameters will be overwritten. You don't need to modify them.
 "input_frames": [],
 "num_frames": 30,
 "width": 512,
 "height": 512,
 "controlnet_frames": []
 }
 }
}

config_stage_2_template = {
 "models": {
 "model_list": [
 "models/stable_diffusion/aingdiffusion_v12.safetensors",
 "models/AnimateDiff/mm_sd_v15_v2.ckpt",
 "models/ControlNet/control_v11f1e_sd15_tile.pth",
 "models/ControlNet/control_v11p_sd15_lineart.pth"
 ],
 "textual_inversion_folder": "models/textual_inversion",
 "device": "cuda",
 "lora_alphas": [],
 "controlnet_units": [
 {
 "processor_id": "tile",
 "model_path": "models/ControlNet/control_v11f1e_sd15_tile.pth",
 "scale": 0.5
 },
 {
 "processor_id": "lineart",
 "model_path": "models/ControlNet/control_v11p_sd15_lineart.pth",
 "scale": 0.5
 }
 ]
 },
 "data": {
 "input_frames": {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 1024,
 "width": 1024,
 "start_frame_id": 0,
 "end_frame_id": 30
 },
 "controlnet_frames": [
 {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 1024,
 "width": 1024,
 "start_frame_id": 0,
 "end_frame_id": 30
 },
 {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 1024,
 "width": 1024,
 "start_frame_id": 0,
 "end_frame_id": 30
 }
 ],
 "output_folder": "/content/output",
 "fps": 25
 },
 "pipeline": {
 "seed": 0,
 "pipeline_inputs": {
 "prompt": "best quality, perfect anime illustration, light, a girl is dancing, smile, solo",
 "negative_prompt": "verybadimagenegative_v1.3",
 "cfg_scale": 7.0,
 "clip_skip": 2,
 "denoising_strength": 1.0,
 "num_inference_steps": 10,
 "animatediff_batch_size": 16,
 "animatediff_stride": 8,
 "unet_batch_size": 1,
 "controlnet_batch_size": 1,
 "cross_frame_attention": False,
 # The following parameters will be overwritten. You don't need to modify them.
 "input_frames": [],
 "num_frames": 30,
 "width": 1536,
 "height": 1536,
 "controlnet_frames": []
 }
 }
}

### Upload Input Video

Before you run the following code, please upload your input video to `/content/input_video.mp4`.

### Toon Shading

Render your video in an anime style.

We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable.


In [None]:
from diffsynth import SDVideoPipelineRunner


config = config_stage_2_template.copy()
config["data"]["input_frames"] = {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 1024,
 "width": 1024,
 "start_frame_id": 0,
 "end_frame_id": 30
}
config["data"]["controlnet_frames"] = [config["data"]["input_frames"], config["data"]["input_frames"]]
config["data"]["output_folder"] = "/content/toon_video"
config["data"]["fps"] = 25

runner = SDVideoPipelineRunner()
runner.run(config)

Let's see the video!

In [None]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/toon_video/video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""

""" % data_url)

### Toon Shading with Editing Signals

In stage 1, input your prompt, and diffutoon will generate the editing signals in the format of low-resolution color video.

In [None]:
from diffsynth import SDVideoPipelineRunner


config_stage_1 = config_stage_1_template.copy()
config_stage_1["data"]["input_frames"] = {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 512,
 "width": 512,
 "start_frame_id": 0,
 "end_frame_id": 30
}
config_stage_1["data"]["controlnet_frames"] = [config_stage_1["data"]["input_frames"], config_stage_1["data"]["input_frames"]]
config_stage_1["data"]["output_folder"] = "/content/color_video"
config_stage_1["data"]["fps"] = 25
config_stage_1["pipeline"]["pipeline_inputs"]["prompt"] = "best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings"

runner = SDVideoPipelineRunner()
runner.run(config_stage_1)

In stage 2, diffutoon will rerender the whole video according to the editing signals.

In [None]:
from diffsynth import SDVideoPipelineRunner


config_stage_2 = config_stage_2_template.copy()
config_stage_2["data"]["input_frames"] = {
 "video_file": "/content/input_video.mp4",
 "image_folder": None,
 "height": 1024,
 "width": 1024,
 "start_frame_id": 0,
 "end_frame_id": 30
}
config_stage_2["data"]["controlnet_frames"][0] = {
 "video_file": "/content/color_video/video.mp4",
 "image_folder": None,
 "height": config_stage_2["data"]["input_frames"]["height"],
 "width": config_stage_2["data"]["input_frames"]["width"],
 "start_frame_id": None,
 "end_frame_id": None
}
config_stage_2["data"]["controlnet_frames"][1] = config["data"]["input_frames"]
config_stage_2["data"]["output_folder"] = "/content/edit_video"
config_stage_2["data"]["fps"] = 25

runner = SDVideoPipelineRunner()
runner.run(config)

Let's see the video!

In [None]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/edit_video/video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""

""" % data_url)