# !/bin/bash # ---------------------------------------------------------------------------------------------------- HEIGHT="1024" # Base height. WIDTH="1024" # Base width. SAMPLES_PER_PROMPT="4" # Num of samples to generate per prompt. NROW="2" # Grid images per row. OUTPUT_DIR="outputs/test" # ---------------------------------------------------------------------------------------------------- MASK_TYPE=("max_norm") # usually:"max_norm" "crossmap_32" "selfmap_min_max_per_channel" "selfmap_64" # [ # "max_norm", "min_max_norm", "binary", "min_max_per_channel", "decoder_map" # "selfmap", "selfmap_min_max_per_channel" "selfmap_64" # ] CFG=7.5 STEPS=25 mask_reused_step=12 UNET_CONFIG="configs/realcustom_sigdino_highres.json" UNET_CHECKPOINT="ckpts/realcustom/RealCustom_0025000_ema_highres.pth" UNET_CHECKPOINT_BASE_MODEL="ckpts/sdxl/unet/general_v1-3_sdxl_03.pth" # ---------------------------------------------------------------------------------------------------- CLIP1_DIR="ckpts/sdxl/clip-sdxl-1" CLIP2_DIR="ckpts/sdxl/clip-sdxl-2" VAE_CONFIG_PATH="ckpts/sdxl/vae/sdxl.json" VAE_CHECKPOINT_PATH="ckpts/sdxl/vae/sdxl-vae.pth" echo "Start inference" python3 inference/inference_single_image.py \ --width $WIDTH \ --height $HEIGHT \ --samples_per_prompt $SAMPLES_PER_PROMPT \ --nrow $NROW \ --sample_steps $STEPS \ --guidance_weight $CFG \ --text_encoder_variant \ $CLIP1_DIR \ $CLIP2_DIR \ --unet_config $UNET_CONFIG \ --unet_checkpoint $UNET_CHECKPOINT \ --unet_checkpoint_base_model $UNET_CHECKPOINT_BASE_MODEL \ --vae_config $VAE_CONFIG_PATH \ --vae_checkpoint $VAE_CHECKPOINT_PATH \ --output_dir $OUTPUT_DIR \ --seed 2024 \ --text_prompt "the figurine is flying in the sky" \ --image_prompt_path "prompts/figurine.png" \ --target_phrase "figurine" \ --mask_scope 0.25 \ --mask_strategy ${MASK_TYPE[*]}