jw2yang commited on
Commit
b3e6b8c
·
1 Parent(s): e8befcf
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -24,11 +24,12 @@ from transformers import AutoProcessor
24
  repo_id = "microsoft/OmniParser-v2.0" # HF repo
25
  local_dir = "weights" # Target local directory
26
 
 
27
  som_generator = MarkHelper()
28
  magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
29
  magma_qa_prompt = "<image>\n{} Answer the question briefly."
30
  magma_model_id = "microsoft/Magma-8B"
31
- magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True)
32
  magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
33
  magam_model.to("cuda")
34
 
@@ -91,7 +92,7 @@ def get_som_response(instruction, image_som):
91
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
92
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
93
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
94
- inputs = inputs.to("cuda")
95
 
96
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
97
  with torch.inference_mode():
 
24
  repo_id = "microsoft/OmniParser-v2.0" # HF repo
25
  local_dir = "weights" # Target local directory
26
 
27
+ dtype = torch.bfloat16
28
  som_generator = MarkHelper()
29
  magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
30
  magma_qa_prompt = "<image>\n{} Answer the question briefly."
31
  magma_model_id = "microsoft/Magma-8B"
32
+ magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
33
  magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
34
  magam_model.to("cuda")
35
 
 
92
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
93
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
94
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
95
+ inputs = inputs.to("cuda").to(dtype)
96
 
97
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
98
  with torch.inference_mode():