Spaces:

openlamm
/

LAMM

Runtime error

App Files Files Community

openlamm commited on Jun 21, 2023

Commit

4fc00f2

1 Parent(s): 8478a70

Update model/openlamm.py

Browse files

Files changed (1) hide show

model/openlamm.py +1 -44

model/openlamm.py CHANGED Viewed

@@ -203,7 +203,7 @@ class LAMMPEFTModel(nn.Module):
             target_modules=self.args['lora_target_modules']
         )
-        self.llama_model = LlamaForCausalLM.from_pretrained(vicuna_ckpt_path, cache_dir='~/.cache/')
         self.llama_model = get_peft_model(self.llama_model, peft_config)
         self.llama_model.print_trainable_parameters()
@@ -221,39 +221,6 @@ class LAMMPEFTModel(nn.Module):
         self.system_header = system_header
         self.device = torch.cuda.current_device()
-    # def encode_video(self, video_paths):
-    #     inputs = {ModalityType.VISION: data.load_and_transform_video_data(video_paths, self.device)}
-    #     # convert into visual dtype
-    #     inputs = {key: inputs[key].to(self.llama_model.dtype) for key in inputs}
-    #     with torch.no_grad():
-    #         embeddings = self.visual_encoder(inputs)
-    #         video_embeds = embeddings[ModalityType.VISION] # bsz x 1024
-    #     inputs_llama = self.llama_proj(video_embeds).unsqueeze(1) # bsz x 1 x llama_size
-    #     atts_llama = torch.ones(inputs_llama.size()[:-1], dtype=torch.long).to(self.device) # bsz x 1
-    #     return inputs_llama, atts_llama
-    # def encode_audio(self, audio_paths):
-    #     inputs = {ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, self.device)}
-    #     # convert into visual dtype
-    #     inputs = {key: inputs[key].to(self.llama_model.dtype) for key in inputs}
-    #     with torch.no_grad():
-    #         embeddings = self.visual_encoder(inputs)
-    #         audio_embeds = embeddings[ModalityType.AUDIO] # bsz x 1024
-    #     inputs_llama = self.llama_proj(audio_embeds).unsqueeze(1) # bsz x 1 x llama_size
-    #     atts_llama = torch.ones(inputs_llama.size()[:-1], dtype=torch.long).to(self.device) # bsz x 1
-    #     return inputs_llama, atts_llama
-    # def encode_thermal(self, thermal_paths):
-    #     inputs = {ModalityType.THERMAL: data.load_and_transform_thermal_data(thermal_paths, self.device)}
-    #     # convert into visual dtype
-    #     inputs = {key: inputs[key].to(self.llama_model.dtype) for key in inputs}
-    #     with torch.no_grad():
-    #         embeddings = self.visual_encoder(inputs)
-    #         image_embeds = embeddings['thermal'] # bsz x 1024
-    #     inputs_llama = self.llama_proj(image_embeds).unsqueeze(1) # bsz x 1 x llama_size
-    #     atts_llama = torch.ones(inputs_llama.size()[:-1], dtype=torch.long).to(self.device) # bsz x 1
-    #     return inputs_llama, atts_llama
     def encode_image(self, image_paths):
         """encode images to llama inputs
@@ -279,16 +246,6 @@ class LAMMPEFTModel(nn.Module):
     def my_encode_image(self, images):
         """encoder loaded image objects"""
-        # if self.encoder_pretrain == 'imagebind':
-        #     inputs = {ModalityType.VISION: data.transform_vision_data(images, self.device)}
-        #     # convert into visual dtype
-        #     inputs = {key: inputs[key].to(self.llama_model.dtype) for key in inputs}
-        #     with torch.no_grad():
-        #         embeddings = self.visual_encoder(inputs)
-        #         image_embeds = embeddings['vision'] # bsz x 1024
-        #     inputs_llama = self.llama_proj(image_embeds).unsqueeze(1) # bsz x 1 x llama_size
-        #     atts_llama = torch.ones(inputs_llama.size()[:-1], dtype=torch.long).to(self.device) # bsz x 1
-        #     return inputs_llama, atts_llama
         if self.encoder_pretrain == 'clip':
             inputs = data.transform_vision_data(images, self.device)                    # bsz x 3 x 224 x 224
             inputs_llama = self.clip_encode_image(inputs)                               # bsz x 1/256 x llama_size

             target_modules=self.args['lora_target_modules']
         )
+        self.llama_model = LlamaForCausalLM.from_pretrained(vicuna_ckpt_path)
         self.llama_model = get_peft_model(self.llama_model, peft_config)
         self.llama_model.print_trainable_parameters()
         self.system_header = system_header
         self.device = torch.cuda.current_device()
     def encode_image(self, image_paths):
         """encode images to llama inputs
     def my_encode_image(self, images):
         """encoder loaded image objects"""
         if self.encoder_pretrain == 'clip':
             inputs = data.transform_vision_data(images, self.device)                    # bsz x 3 x 224 x 224
             inputs_llama = self.clip_encode_image(inputs)                               # bsz x 1/256 x llama_size