File size: 1,976 Bytes
7cc4b41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# !/bin/bash
# ----------------------------------------------------------------------------------------------------
HEIGHT="1024"               # Base height.
WIDTH="1024"                # Base width.
SAMPLES_PER_PROMPT="4"      # Num of samples to generate per prompt.
NROW="2"                    # Grid images per row.

OUTPUT_DIR="outputs/test"

# ----------------------------------------------------------------------------------------------------
MASK_TYPE=("max_norm") 
# usually:"max_norm" "crossmap_32" "selfmap_min_max_per_channel" "selfmap_64"
# [
#    "max_norm", "min_max_norm", "binary", "min_max_per_channel", "decoder_map"
#    "selfmap", "selfmap_min_max_per_channel" "selfmap_64"

# ]

CFG=7.5
STEPS=25
mask_reused_step=12

UNET_CONFIG="configs/realcustom_sigdino_highres.json"
UNET_CHECKPOINT="ckpts/realcustom/RealCustom_0025000_ema_highres.pth"
UNET_CHECKPOINT_BASE_MODEL="ckpts/sdxl/unet/general_v1-3_sdxl_03.pth"
# ----------------------------------------------------------------------------------------------------
CLIP1_DIR="ckpts/sdxl/clip-sdxl-1"
CLIP2_DIR="ckpts/sdxl/clip-sdxl-2"
VAE_CONFIG_PATH="ckpts/sdxl/vae/sdxl.json"
VAE_CHECKPOINT_PATH="ckpts/sdxl/vae/sdxl-vae.pth"


echo "Start inference"
python3 inference/inference_single_image.py \
    --width $WIDTH \
    --height $HEIGHT \
    --samples_per_prompt $SAMPLES_PER_PROMPT \
    --nrow $NROW \
    --sample_steps $STEPS \
    --guidance_weight $CFG \
    --text_encoder_variant \
        $CLIP1_DIR \
        $CLIP2_DIR \
    --unet_config $UNET_CONFIG \
    --unet_checkpoint $UNET_CHECKPOINT \
    --unet_checkpoint_base_model $UNET_CHECKPOINT_BASE_MODEL \
    --vae_config $VAE_CONFIG_PATH \
    --vae_checkpoint $VAE_CHECKPOINT_PATH \
    --output_dir $OUTPUT_DIR \
    --seed 2024 \
    --text_prompt "the figurine is flying in the sky" \
    --image_prompt_path "prompts/figurine.png" \
    --target_phrase "figurine" \
    --mask_scope 0.25 \
    --mask_strategy ${MASK_TYPE[*]}