jw2yang commited on
Commit
a62634a
·
1 Parent(s): 2cf629f
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -28,13 +28,15 @@ repo_id = "microsoft/OmniParser-v2.0" # HF repo
28
  local_dir = "weights" # Target local directory
29
 
30
  dtype = torch.bfloat16
 
 
31
  som_generator = MarkHelper()
32
  magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
33
  magma_qa_prompt = "<image>\n{} Answer the question briefly."
34
  magma_model_id = "microsoft/Magma-8B"
35
  magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
36
  magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
37
- magam_model.to("cuda")
38
 
39
  # Download the entire repository
40
  # snapshot_download(repo_id=repo_id, local_dir=local_dir)
@@ -71,8 +73,6 @@ This demo is powered by [Gradio](https://gradio.app/) and uses [OmniParserv2](ht
71
  </div>
72
  """
73
 
74
- DEVICE = torch.device('cuda')
75
-
76
  @spaces.GPU
77
  @torch.inference_mode()
78
  def get_som_response(instruction, image_som):
@@ -92,7 +92,7 @@ def get_som_response(instruction, image_som):
92
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
93
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
94
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
95
- inputs = inputs.to(dtype)
96
 
97
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
98
  with torch.inference_mode():
@@ -129,7 +129,7 @@ def get_qa_response(instruction, image):
129
  inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
130
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
131
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
132
- inputs = inputs.to(dtype)
133
 
134
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
135
  with torch.inference_mode():
 
28
  local_dir = "weights" # Target local directory
29
 
30
  dtype = torch.bfloat16
31
+ DEVICE = torch.device('cuda')
32
+
33
  som_generator = MarkHelper()
34
  magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
35
  magma_qa_prompt = "<image>\n{} Answer the question briefly."
36
  magma_model_id = "microsoft/Magma-8B"
37
  magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
38
  magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
39
+ magam_model.to(DEVICE)
40
 
41
  # Download the entire repository
42
  # snapshot_download(repo_id=repo_id, local_dir=local_dir)
 
73
  </div>
74
  """
75
 
 
 
76
  @spaces.GPU
77
  @torch.inference_mode()
78
  def get_som_response(instruction, image_som):
 
92
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
93
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
94
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
95
+ inputs = inputs.to(dtype).to(DEVICE)
96
 
97
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
98
  with torch.inference_mode():
 
129
  inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
130
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
131
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
132
+ inputs = inputs.to(dtype).to(DEVICE)
133
 
134
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
135
  with torch.inference_mode():