import torch from transformers import PretrainedConfig, BitsAndBytesConfig import math from typing import Optional class VisionProjectorConfig(PretrainedConfig): def __init__( self, input_dim=768, hidden_dim=256, num_tokens=1, output_dim=2560, **kwargs ): #super.__init__(**kwargs) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.num_tokens = num_tokens self.kwargs = kwargs app_config = dict( max_seqlen=512, max_caption_len=100, data_dir='../data', output_dir="./results", vision_model=True, vision_projector_file='models/vision_projector/vp_ckpt_0.pth', phi_adapter_dir='models/phi_adapter' )