{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "8ObdI5jCB8xy" }, "source": [ "# DiffSynth Studio\n", "\n", "Welcome to DiffSynth Studio! This is an example of Diffutoon." ] }, { "cell_type": "markdown", "metadata": { "id": "XSkKX7O2BwuM" }, "source": [ "## Install" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "msCpt0pLnT8W", "outputId": "35d93b35-451b-4760-d1ee-ef7ff190916e" }, "outputs": [], "source": [ "!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n", "!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops cupy-cuda12x\n", "%cd /content/DiffSynth-Studio" ] }, { "cell_type": "markdown", "metadata": { "id": "5eCu_rlKB3kK" }, "source": [ "## Download Models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "9znMkpVj3qZ1" }, "outputs": [], "source": [ "import requests\n", "\n", "\n", "def download_model(url, file_path):\n", " model_file = requests.get(url, allow_redirects=True)\n", " with open(file_path, \"wb\") as f:\n", " f.write(model_file.content)\n", "\n", "download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n", "download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n", "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth\", \"models/ControlNet/control_v11f1p_sd15_depth.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth\", \"models/ControlNet/control_v11p_sd15_softedge.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt\", \"models/Annotators/dpt_hybrid-midas-501f0c75.pt\")\n", "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth\", \"models/Annotators/ControlNetHED.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n", "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n", "download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")" ] }, { "cell_type": "markdown", "metadata": { "id": "iwOq2lWtKVYS" }, "source": [ "## Run Diffutoon" ] }, { "cell_type": "markdown", "metadata": { "id": "tII_XRY-PJeo" }, "source": [ "### Config Template" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vsd2alA3PrGe" }, "outputs": [], "source": [ "config_stage_1_template = {\n", " \"models\": {\n", " \"model_list\": [\n", " \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n", " \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n", " \"models/ControlNet/control_v11f1p_sd15_depth.pth\"\n", " ],\n", " \"textual_inversion_folder\": \"models/textual_inversion\",\n", " \"device\": \"cuda\",\n", " \"lora_alphas\": [],\n", " \"controlnet_units\": [\n", " {\n", " \"processor_id\": \"softedge\",\n", " \"model_path\": \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n", " \"scale\": 0.5\n", " },\n", " {\n", " \"processor_id\": \"depth\",\n", " \"model_path\": \"models/ControlNet/control_v11f1p_sd15_depth.pth\",\n", " \"scale\": 0.5\n", " }\n", " ]\n", " },\n", " \"data\": {\n", " \"input_frames\": {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 512,\n", " \"width\": 512,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " },\n", " \"controlnet_frames\": [\n", " {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 512,\n", " \"width\": 512,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " },\n", " {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 512,\n", " \"width\": 512,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " }\n", " ],\n", " \"output_folder\": \"data/examples/diffutoon_edit/color_video\",\n", " \"fps\": 25\n", " },\n", " \"smoother_configs\": [\n", " {\n", " \"processor_type\": \"FastBlend\",\n", " \"config\": {}\n", " }\n", " ],\n", " \"pipeline\": {\n", " \"seed\": 0,\n", " \"pipeline_inputs\": {\n", " \"prompt\": \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\",\n", " \"negative_prompt\": \"verybadimagenegative_v1.3\",\n", " \"cfg_scale\": 7.0,\n", " \"clip_skip\": 1,\n", " \"denoising_strength\": 0.9,\n", " \"num_inference_steps\": 20,\n", " \"animatediff_batch_size\": 8,\n", " \"animatediff_stride\": 4,\n", " \"unet_batch_size\": 8,\n", " \"controlnet_batch_size\": 8,\n", " \"cross_frame_attention\": True,\n", " \"smoother_progress_ids\": [-1],\n", " # The following parameters will be overwritten. You don't need to modify them.\n", " \"input_frames\": [],\n", " \"num_frames\": 30,\n", " \"width\": 512,\n", " \"height\": 512,\n", " \"controlnet_frames\": []\n", " }\n", " }\n", "}\n", "\n", "config_stage_2_template = {\n", " \"models\": {\n", " \"model_list\": [\n", " \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n", " \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n", " \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", " \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n", " ],\n", " \"textual_inversion_folder\": \"models/textual_inversion\",\n", " \"device\": \"cuda\",\n", " \"lora_alphas\": [],\n", " \"controlnet_units\": [\n", " {\n", " \"processor_id\": \"tile\",\n", " \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", " \"scale\": 0.5\n", " },\n", " {\n", " \"processor_id\": \"lineart\",\n", " \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n", " \"scale\": 0.5\n", " }\n", " ]\n", " },\n", " \"data\": {\n", " \"input_frames\": {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 1024,\n", " \"width\": 1024,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " },\n", " \"controlnet_frames\": [\n", " {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 1024,\n", " \"width\": 1024,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " },\n", " {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 1024,\n", " \"width\": 1024,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", " }\n", " ],\n", " \"output_folder\": \"/content/output\",\n", " \"fps\": 25\n", " },\n", " \"pipeline\": {\n", " \"seed\": 0,\n", " \"pipeline_inputs\": {\n", " \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n", " \"negative_prompt\": \"verybadimagenegative_v1.3\",\n", " \"cfg_scale\": 7.0,\n", " \"clip_skip\": 2,\n", " \"denoising_strength\": 1.0,\n", " \"num_inference_steps\": 10,\n", " \"animatediff_batch_size\": 16,\n", " \"animatediff_stride\": 8,\n", " \"unet_batch_size\": 1,\n", " \"controlnet_batch_size\": 1,\n", " \"cross_frame_attention\": False,\n", " # The following parameters will be overwritten. You don't need to modify them.\n", " \"input_frames\": [],\n", " \"num_frames\": 30,\n", " \"width\": 1536,\n", " \"height\": 1536,\n", " \"controlnet_frames\": []\n", " }\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": { "id": "113QAmNHP6T_" }, "source": [ "### Upload Input Video\n", "\n", "Before you run the following code, please upload your input video to `/content/input_video.mp4`." ] }, { "cell_type": "markdown", "metadata": { "id": "CyqAsj1o5U9B" }, "source": [ "### Toon Shading\n", "\n", "Render your video in an anime style.\n", "\n", "We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "761nbrgeKMvj", "outputId": "c0d47d5f-16e9-4a65-e664-9bd5fc491111" }, "outputs": [], "source": [ "from diffsynth import SDVideoPipelineRunner\n", "\n", "\n", "config = config_stage_2_template.copy()\n", "config[\"data\"][\"input_frames\"] = {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 1024,\n", " \"width\": 1024,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", "}\n", "config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n", "config[\"data\"][\"output_folder\"] = \"/content/toon_video\"\n", "config[\"data\"][\"fps\"] = 25\n", "\n", "runner = SDVideoPipelineRunner()\n", "runner.run(config)" ] }, { "cell_type": "markdown", "metadata": { "id": "9wujhGUmDIwY" }, "source": [ "Let's see the video!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 420 }, "id": "TBNAigacAq6h", "outputId": "8f57c3b4-982b-4643-f3dc-53c51bd85a4b" }, "outputs": [], "source": [ "from IPython.display import HTML\n", "from base64 import b64encode\n", "\n", "mp4 = open(\"/content/toon_video/video.mp4\", \"rb\").read()\n", "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", "HTML(\"\"\"\n", "\n", "\"\"\" % data_url)" ] }, { "cell_type": "markdown", "metadata": { "id": "48hQfX--5YGi" }, "source": [ "### Toon Shading with Editing Signals" ] }, { "cell_type": "markdown", "metadata": { "id": "bAQ9Zq-3-MH6" }, "source": [ "In stage 1, input your prompt, and diffutoon will generate the editing signals in the format of low-resolution color video." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BtDzYgIq5bgg", "outputId": "bb27b7b9-7979-4409-f476-f25f0a164ef4" }, "outputs": [], "source": [ "from diffsynth import SDVideoPipelineRunner\n", "\n", "\n", "config_stage_1 = config_stage_1_template.copy()\n", "config_stage_1[\"data\"][\"input_frames\"] = {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 512,\n", " \"width\": 512,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", "}\n", "config_stage_1[\"data\"][\"controlnet_frames\"] = [config_stage_1[\"data\"][\"input_frames\"], config_stage_1[\"data\"][\"input_frames\"]]\n", "config_stage_1[\"data\"][\"output_folder\"] = \"/content/color_video\"\n", "config_stage_1[\"data\"][\"fps\"] = 25\n", "config_stage_1[\"pipeline\"][\"pipeline_inputs\"][\"prompt\"] = \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\"\n", "\n", "runner = SDVideoPipelineRunner()\n", "runner.run(config_stage_1)" ] }, { "cell_type": "markdown", "metadata": { "id": "D9_AWwhi-pA9" }, "source": [ "In stage 2, diffutoon will rerender the whole video according to the editing signals." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JFysCk7y51i_", "outputId": "475050d3-c72e-4e08-b55c-d59ed86b5497" }, "outputs": [], "source": [ "from diffsynth import SDVideoPipelineRunner\n", "\n", "\n", "config_stage_2 = config_stage_2_template.copy()\n", "config_stage_2[\"data\"][\"input_frames\"] = {\n", " \"video_file\": \"/content/input_video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": 1024,\n", " \"width\": 1024,\n", " \"start_frame_id\": 0,\n", " \"end_frame_id\": 30\n", "}\n", "config_stage_2[\"data\"][\"controlnet_frames\"][0] = {\n", " \"video_file\": \"/content/color_video/video.mp4\",\n", " \"image_folder\": None,\n", " \"height\": config_stage_2[\"data\"][\"input_frames\"][\"height\"],\n", " \"width\": config_stage_2[\"data\"][\"input_frames\"][\"width\"],\n", " \"start_frame_id\": None,\n", " \"end_frame_id\": None\n", "}\n", "config_stage_2[\"data\"][\"controlnet_frames\"][1] = config[\"data\"][\"input_frames\"]\n", "config_stage_2[\"data\"][\"output_folder\"] = \"/content/edit_video\"\n", "config_stage_2[\"data\"][\"fps\"] = 25\n", "\n", "runner = SDVideoPipelineRunner()\n", "runner.run(config)" ] }, { "cell_type": "markdown", "metadata": { "id": "HIPrCAIS_Im0" }, "source": [ "Let's see the video!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 420 }, "id": "Y2nz7rew-7VI", "outputId": "fbcbadc6-4045-4aac-dfb0-80bacec003bf" }, "outputs": [], "source": [ "from IPython.display import HTML\n", "from base64 import b64encode\n", "\n", "mp4 = open(\"/content/edit_video/video.mp4\", \"rb\").read()\n", "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", "HTML(\"\"\"\n", "\n", "\"\"\" % data_url)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [ "tII_XRY-PJeo" ], "gpuType": "T4", "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }