jw2yang commited on
Commit
2f72390
·
1 Parent(s): 28c8be9
app.py CHANGED
@@ -16,8 +16,8 @@ import random
16
  pygame.mixer.quit() # Disable sound
17
 
18
  # Constants
19
- WIDTH, HEIGHT = 800, 800
20
- GRID_SIZE = 80
21
  WHITE = (255, 255, 255)
22
  GREEN = (34, 139, 34) # Forest green - more like an apple
23
  RED = (200, 50, 50)
@@ -42,9 +42,12 @@ magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_cod
42
  magam_model.to("cuda")
43
 
44
  # Load magma image
45
- magma_img = pygame.image.load("./assets/images/magma_game.png")
46
  magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
47
 
 
 
 
48
  class MagmaFindGPU:
49
  def __init__(self):
50
  self.reset()
@@ -106,8 +109,10 @@ class MagmaFindGPU:
106
  surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
107
 
108
  # pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
109
- pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
110
-
 
 
111
  # Draw four surrounding squares with labels
112
  head_x, head_y = self.snake[0]
113
  neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
@@ -137,7 +142,7 @@ def play_game():
137
  pil_img = Image.fromarray(state_som)
138
  convs = [
139
  {"role": "system", "content": "You are an agent that can see, talk, and act."},
140
- {"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green block? Answer with a single number."},
141
  ]
142
  prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
143
  inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
@@ -183,10 +188,10 @@ MARKDOWN = """
183
  <div align="center">
184
  <h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
185
 
186
- Game: Magma finds the apple by moving up, down, left and right.
187
-
188
  \[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] &nbsp; \[[Project Page](https://microsoft.github.io/Magma/)\] &nbsp; \[[Github Repo](https://github.com/microsoft/Magma)\] &nbsp; \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\] &nbsp;
189
 
 
 
190
  This demo is powered by [Gradio](https://gradio.app/).
191
  </div>
192
  """
 
16
  pygame.mixer.quit() # Disable sound
17
 
18
  # Constants
19
+ WIDTH, HEIGHT = 640, 640
20
+ GRID_SIZE = 64
21
  WHITE = (255, 255, 255)
22
  GREEN = (34, 139, 34) # Forest green - more like an apple
23
  RED = (200, 50, 50)
 
42
  magam_model.to("cuda")
43
 
44
  # Load magma image
45
+ magma_img = pygame.image.load("./assets/images/magma_game_thin.png")
46
  magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
47
 
48
+ target_img = pygame.image.load("./assets/images/apple.png")
49
+ target_img = pygame.transform.scale(target_img, (GRID_SIZE, GRID_SIZE))
50
+
51
  class MagmaFindGPU:
52
  def __init__(self):
53
  self.reset()
 
109
  surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
110
 
111
  # pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
112
+ # pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
113
+ # Draw green apple target
114
+ surface.blit(target_img, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE))
115
+
116
  # Draw four surrounding squares with labels
117
  head_x, head_y = self.snake[0]
118
  neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
 
142
  pil_img = Image.fromarray(state_som)
143
  convs = [
144
  {"role": "system", "content": "You are an agent that can see, talk, and act."},
145
+ {"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green apple? Answer with a single number."},
146
  ]
147
  prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
148
  inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
 
188
  <div align="center">
189
  <h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
190
 
 
 
191
  \[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] &nbsp; \[[Project Page](https://microsoft.github.io/Magma/)\] &nbsp; \[[Github Repo](https://github.com/microsoft/Magma)\] &nbsp; \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\] &nbsp;
192
 
193
+ Game: Magma collects apple by moving up, down, left and right.
194
+
195
  This demo is powered by [Gradio](https://gradio.app/).
196
  </div>
197
  """
assets/images/apple.png ADDED
assets/images/magma_game_thin.png ADDED