Hayloo9838 commited on
Commit
c3b28d7
·
verified ·
1 Parent(s): a3d0c64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -1
app.py CHANGED
@@ -9,7 +9,62 @@ import requests
9
  import matplotlib.pyplot as plt
10
  from huggingface_hub import hf_hub_download
11
 
12
- # ... (rest of your code remains the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def process_image_classification(image):
15
  model, processor, reverse_mapping, device = load_model()
@@ -38,6 +93,27 @@ def process_image_classification(image):
38
 
39
  return visualization_rgb, card_name, confidence
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def gradio_interface():
42
  gr_interface = gr.Interface(
43
  fn=process_image_classification,
 
9
  import matplotlib.pyplot as plt
10
  from huggingface_hub import hf_hub_download
11
 
12
+ MODEL_PATH = "pytorch_model.bin"
13
+ REPO_ID = "Hayloo9838/uno-recognizer"
14
+ MAPANDSTUFF = "mapandstuff.pth"
15
+
16
+ class CLIPVisionClassifier(nn.Module):
17
+ def __init__(self, num_labels):
18
+ super().__init__()
19
+ self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14',
20
+ attn_implementation="eager")
21
+ self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
22
+ self.dropout = nn.Dropout(0.1)
23
+
24
+ def forward(self, pixel_values, output_attentions=False):
25
+ outputs = self.vision_model(pixel_values, output_attentions=output_attentions)
26
+ pooled_output = outputs.pooler_output
27
+ logits = self.classifier(pooled_output)
28
+
29
+ if output_attentions:
30
+ return logits, outputs.attentions
31
+ return logits
32
+
33
+ def get_attention_map(attentions):
34
+ attention = attentions[-1]
35
+ attention = attention.mean(dim=1)
36
+ attention = attention[0, 0, 1:]
37
+
38
+ num_patches = int(np.sqrt(attention.shape[0]))
39
+
40
+ attention_map = attention.reshape(num_patches, num_patches)
41
+
42
+ attention_map = attention_map.cpu().numpy()
43
+
44
+ attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
45
+ return attention_map
46
+
47
+ def apply_heatmap(image, attention_map, new_size=None):
48
+ heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
49
+
50
+ if isinstance(image, Image.Image):
51
+ image = np.array(image)
52
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
53
+
54
+ if new_size is not None:
55
+ image_resized = cv2.resize(image, new_size)
56
+ attention_map_resized = cv2.resize(attention_map, image_resized.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
57
+ attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
58
+ heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
59
+ output = cv2.addWeighted(image_resized, 0.7, heatmap_resized, 0.3, 0)
60
+ else:
61
+ attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
62
+ attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
63
+ heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
64
+ output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
65
+
66
+
67
+ return output
68
 
69
  def process_image_classification(image):
70
  model, processor, reverse_mapping, device = load_model()
 
93
 
94
  return visualization_rgb, card_name, confidence
95
 
96
+ def load_model():
97
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
98
+
99
+ # Download model weights and label mapping from Hugging Face Hub
100
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
101
+ #mapandstuff_path = hf_hub_download(repo_id=REPO_ID, filename=MAPANDSTUFF)
102
+ checkpoint = torch.load(model_path, map_location=device)
103
+ label_mapping = checkpoint['label_mapping']
104
+ reverse_mapping = {v: k for k, v in label_mapping.items()}
105
+ model = CLIPVisionClassifier(len(label_mapping))
106
+
107
+ model_state_dict = checkpoint["model_state_dict"]
108
+ model.load_state_dict(model_state_dict)
109
+
110
+ model = model.to(device)
111
+ model.eval()
112
+
113
+ processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
114
+
115
+ return model, processor, reverse_mapping, device
116
+
117
  def gradio_interface():
118
  gr_interface = gr.Interface(
119
  fn=process_image_classification,