RealCustom / inference /inference_single_image.sh
CoreloneH's picture
Add application file
7cc4b41
# !/bin/bash
# ----------------------------------------------------------------------------------------------------
HEIGHT="1024" # Base height.
WIDTH="1024" # Base width.
SAMPLES_PER_PROMPT="4" # Num of samples to generate per prompt.
NROW="2" # Grid images per row.
OUTPUT_DIR="outputs/test"
# ----------------------------------------------------------------------------------------------------
MASK_TYPE=("max_norm")
# usually:"max_norm" "crossmap_32" "selfmap_min_max_per_channel" "selfmap_64"
# [
# "max_norm", "min_max_norm", "binary", "min_max_per_channel", "decoder_map"
# "selfmap", "selfmap_min_max_per_channel" "selfmap_64"
# ]
CFG=7.5
STEPS=25
mask_reused_step=12
UNET_CONFIG="configs/realcustom_sigdino_highres.json"
UNET_CHECKPOINT="ckpts/realcustom/RealCustom_0025000_ema_highres.pth"
UNET_CHECKPOINT_BASE_MODEL="ckpts/sdxl/unet/general_v1-3_sdxl_03.pth"
# ----------------------------------------------------------------------------------------------------
CLIP1_DIR="ckpts/sdxl/clip-sdxl-1"
CLIP2_DIR="ckpts/sdxl/clip-sdxl-2"
VAE_CONFIG_PATH="ckpts/sdxl/vae/sdxl.json"
VAE_CHECKPOINT_PATH="ckpts/sdxl/vae/sdxl-vae.pth"
echo "Start inference"
python3 inference/inference_single_image.py \
--width $WIDTH \
--height $HEIGHT \
--samples_per_prompt $SAMPLES_PER_PROMPT \
--nrow $NROW \
--sample_steps $STEPS \
--guidance_weight $CFG \
--text_encoder_variant \
$CLIP1_DIR \
$CLIP2_DIR \
--unet_config $UNET_CONFIG \
--unet_checkpoint $UNET_CHECKPOINT \
--unet_checkpoint_base_model $UNET_CHECKPOINT_BASE_MODEL \
--vae_config $VAE_CONFIG_PATH \
--vae_checkpoint $VAE_CHECKPOINT_PATH \
--output_dir $OUTPUT_DIR \
--seed 2024 \
--text_prompt "the figurine is flying in the sky" \
--image_prompt_path "prompts/figurine.png" \
--target_phrase "figurine" \
--mask_scope 0.25 \
--mask_strategy ${MASK_TYPE[*]}