Spaces:

microsoft
/

Magma-UI

Running on Zero

App Files Files Community

jw2yang commited on Feb 26

Commit

a62634a

1 Parent(s): 2cf629f

update

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -28,13 +28,15 @@ repo_id = "microsoft/OmniParser-v2.0"  # HF repo
 local_dir = "weights"  # Target local directory
 dtype = torch.bfloat16
 som_generator = MarkHelper()
 magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
 magma_qa_prompt = "<image>\n{} Answer the question briefly."
 magma_model_id = "microsoft/Magma-8B"
 magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
 magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
-magam_model.to("cuda")
 # Download the entire repository
 # snapshot_download(repo_id=repo_id, local_dir=local_dir)
@@ -71,8 +73,6 @@ This demo is powered by [Gradio](https://gradio.app/) and uses [OmniParserv2](ht
 </div>
 """
-DEVICE = torch.device('cuda')
 @spaces.GPU
 @torch.inference_mode()
 def get_som_response(instruction, image_som):
@@ -92,7 +92,7 @@ def get_som_response(instruction, image_som):
     inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
     inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
     inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
-    inputs = inputs.to(dtype)
     magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
     with torch.inference_mode():
@@ -129,7 +129,7 @@ def get_qa_response(instruction, image):
     inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
     inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
     inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
-    inputs = inputs.to(dtype)
     magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
     with torch.inference_mode():

 local_dir = "weights"  # Target local directory
 dtype = torch.bfloat16
+DEVICE = torch.device('cuda')
 som_generator = MarkHelper()
 magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
 magma_qa_prompt = "<image>\n{} Answer the question briefly."
 magma_model_id = "microsoft/Magma-8B"
 magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
 magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
+magam_model.to(DEVICE)
 # Download the entire repository
 # snapshot_download(repo_id=repo_id, local_dir=local_dir)
 </div>
 """
 @spaces.GPU
 @torch.inference_mode()
 def get_som_response(instruction, image_som):
     inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
     inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
     inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
+    inputs = inputs.to(dtype).to(DEVICE)
     magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
     with torch.inference_mode():
     inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
     inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
     inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
+    inputs = inputs.to(dtype).to(DEVICE)
     magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
     with torch.inference_mode():