Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files
app.py
CHANGED
@@ -24,11 +24,12 @@ from transformers import AutoProcessor
|
|
24 |
repo_id = "microsoft/OmniParser-v2.0" # HF repo
|
25 |
local_dir = "weights" # Target local directory
|
26 |
|
|
|
27 |
som_generator = MarkHelper()
|
28 |
magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
|
29 |
magma_qa_prompt = "<image>\n{} Answer the question briefly."
|
30 |
magma_model_id = "microsoft/Magma-8B"
|
31 |
-
magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True)
|
32 |
magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
|
33 |
magam_model.to("cuda")
|
34 |
|
@@ -91,7 +92,7 @@ def get_som_response(instruction, image_som):
|
|
91 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
92 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
93 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
94 |
-
inputs = inputs.to("cuda")
|
95 |
|
96 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
97 |
with torch.inference_mode():
|
|
|
24 |
repo_id = "microsoft/OmniParser-v2.0" # HF repo
|
25 |
local_dir = "weights" # Target local directory
|
26 |
|
27 |
+
dtype = torch.bfloat16
|
28 |
som_generator = MarkHelper()
|
29 |
magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
|
30 |
magma_qa_prompt = "<image>\n{} Answer the question briefly."
|
31 |
magma_model_id = "microsoft/Magma-8B"
|
32 |
+
magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
|
33 |
magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
|
34 |
magam_model.to("cuda")
|
35 |
|
|
|
92 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
93 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
94 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
95 |
+
inputs = inputs.to("cuda").to(dtype)
|
96 |
|
97 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
98 |
with torch.inference_mode():
|