Zeyue7's picture
AudioX
8ab1cf8
import json
from .factory import create_model_from_config
from .utils import load_ckpt_state_dict
from huggingface_hub import hf_hub_download
import torch
def get_pretrained_model(name: str):
model_config_path = hf_hub_download(name, filename="config.json", repo_type='model')
with open(model_config_path) as f:
model_config = json.load(f)
model = create_model_from_config(model_config)
# Try to download the model.safetensors file first, if it doesn't exist, download the model.ckpt file
try:
model_ckpt_path = hf_hub_download(name, filename="model.safetensors", repo_type='model')
except Exception as e:
model_ckpt_path = hf_hub_download(name, filename="model.ckpt", repo_type='model')
# Load state dict with strict=False to ignore missing keys
state_dict = load_ckpt_state_dict(model_ckpt_path)
model.load_state_dict(state_dict, strict=False)
# Initialize missing position_ids if needed
if hasattr(model.conditioner.conditioners.video_prompt.visual_encoder_model.vision_model.embeddings, 'num_positions'):
num_positions = model.conditioner.conditioners.video_prompt.visual_encoder_model.vision_model.embeddings.num_positions
model.conditioner.conditioners.video_prompt.visual_encoder_model.vision_model.embeddings.position_ids = torch.arange(0, num_positions, dtype=torch.long)
return model, model_config