Kaushik066 commited on
Commit
151b2c7
·
verified ·
1 Parent(s): fc176df

change to insightFace model

Browse files
Files changed (1) hide show
  1. app.py +164 -125
app.py CHANGED
@@ -6,6 +6,8 @@ from transformers import ViTImageProcessor
6
 
7
  # For Model
8
  from transformers import ViTModel, ViTConfig, pipeline
 
 
9
 
10
  # For data augmentation
11
  from torchvision import transforms, datasets
@@ -25,6 +27,7 @@ from torch.utils.data import Dataset, DataLoader
25
  # Other Generic Libraries
26
  import torch
27
  from PIL import Image
 
28
  import os
29
  import streamlit as st
30
  import gc
@@ -48,134 +51,147 @@ data_path = 'employees'
48
  model_path = 'vit_pytorch_GPU_1.pt'
49
  webcam_path = 'captured_image.jpg'
50
 
 
 
51
  # Set Title
52
  st.title("Employee Attendance System")
53
- #pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
54
 
55
  # Define Image Processor
56
- image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
57
 
58
  # Define ML Model
59
- class FaceEmbeddingModel(torch.nn.Module):
60
- def __init__(self, model_name, embedding_size):
61
- super(FaceEmbeddingModel, self).__init__()
62
- self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
63
- self.backbone = ViTModel.from_pretrained(model_name, config=self.config) # Load ViT model
64
- self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
65
-
66
- def forward(self, images):
67
- x = self.backbone(images).last_hidden_state[:, 0] # Extract embeddings
68
- x = self.fc(x) # Convert to 512D embedding
69
- return torch.nn.functional.normalize(x) # Normalize for cosine similarity
70
-
 
71
  # Load the model
72
- model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
73
 
74
  # Define the ML model - Evaluation function
75
- def prod_function(transformer_model, prod_dl, webcam_dl):
76
- # Initialize accelerator
77
- accelerator = Accelerator()
78
-
79
- # to INFO for the main process only.
80
- if accelerator.is_main_process:
81
- datasets.utils.logging.set_verbosity_warning()
82
- transformers.utils.logging.set_verbosity_info()
83
- else:
84
- datasets.utils.logging.set_verbosity_error()
85
- transformers.utils.logging.set_verbosity_error()
86
-
87
- # The seed need to be set before we instantiate the model, as it will determine the random head.
88
- set_seed(42)
89
-
90
- # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
91
- accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
92
-
93
- # Evaluate at the end of the epoch
94
- accelerated_model.eval()
95
-
96
- # Find Embedding of the image to be evaluated
97
- for batch in acclerated_webcam_dl:
98
- with torch.no_grad():
99
- #img_prod = acclerated_prod_data['pixel_values']
100
- emb_prod = accelerated_model(batch['pixel_values'])
101
-
102
- prod_preds = []
103
-
104
- for batch in acclerated_prod_dl:
105
- #img = batch['pixel_values']
106
- with torch.no_grad():
107
- emb = accelerated_model(batch['pixel_values'])
108
- distance = F.pairwise_distance(emb, emb_prod)
109
-
110
- prod_preds.append(distance)
111
- return prod_preds
112
-
113
  # Creation of Dataloader
114
- class CustomDatasetProd(Dataset):
115
- def __init__(self, pixel_values):
116
- self.pixel_values = pixel_values
117
-
118
- def __len__(self):
119
- return len(self.pixel_values)
120
-
121
- def __getitem__(self, idx):
122
- item = {
123
- 'pixel_values': self.pixel_values[idx].squeeze(0),
124
- }
125
- return item
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Creation of Dataset
128
- class CreateDatasetProd():
129
- def __init__(self, image_processor):
130
- super().__init__()
131
- self.image_processor = image_processor
132
- # Define a transformation pipeline
133
- self.transform_prod = transforms.v2.Compose([
134
- transforms.v2.ToImage(),
135
- transforms.v2.ToDtype(torch.uint8, scale=False)
136
- ])
137
-
138
- def get_pixels(self, img_paths):
139
- pixel_values = []
140
- for path in img_paths:
141
- # Read and process Images
142
- img = Image.open(path)
143
- img = self.transform_prod(img)
144
-
145
- # Scaling the video to ML model's desired format
146
- img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
147
-
148
- pixel_values.append(img['pixel_values'].squeeze(0))
149
-
150
- # Force garbage collection
151
- del img
152
- gc.collect()
153
- return pixel_values
154
-
155
- def get_pixel(self, img_path):
156
- # Read and process Images
157
- img = Image.open(img_path)
158
- img = self.transform_prod(img)
159
-
160
- # Scaling the video to ML model's desired format
161
- img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
162
-
163
- pixel_values = img['pixel_values'] #.squeeze(0)
164
-
165
- # Force garbage collection
166
- del img
167
- gc.collect()
168
-
169
- return pixel_values
170
-
171
- def create_dataset(self, image_paths, webcam=False):
172
- if webcam == True:
173
- pixel_values = self.get_pixel(image_paths)
174
- else:
175
- pixel_values = torch.stack(self.get_pixels(image_paths))
176
-
177
- return CustomDatasetProd(pixel_values=pixel_values)
178
-
179
  # Read images from directory
180
  image_paths = []
181
  image_file = glob(os.path.join(data_path, '*.jpg'))
@@ -184,15 +200,38 @@ image_paths.extend(image_file)
184
  #st.write('input path size:', len(image_paths))
185
  #st.write(image_paths)
186
 
 
 
 
 
187
  # Create DataLoader for Employees image
188
- dataset_prod_obj = CreateDatasetProd(image_processor_prod)
189
- prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
190
- prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
191
 
192
  ## Testing the dataloader
193
  #prod_inputs = next(iter(prod_dl))
194
  #st.write(prod_inputs['pixel_values'].shape)
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
197
  # About the app Tab
198
  with about_tab:
@@ -231,8 +270,8 @@ with app_tab:
231
  #st.write('Image saved as:',webcam_path)
232
 
233
  ## Create DataLoader for Webcam Image
234
- webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
235
- webcam_dl = DataLoader(webcam_ds, batch_size=BATCH_SIZE)
236
 
237
  ## Testing the dataloader
238
  #prod_inputs = next(iter(webcam_dl))
@@ -240,14 +279,14 @@ with app_tab:
240
 
241
  with st.spinner("Wait for it...", show_time=True):
242
  # Run the predictions
243
- prediction = prod_function(model_pretrained, prod_dl, webcam_dl)
244
- predictions = torch.cat(prediction, 0).to(device)
245
- match_idx = torch.argmin(predictions)
246
  st.write(predictions)
247
  st.write(image_paths)
248
 
249
  # Display the results
250
- if predictions[match_idx] <= 0.3:
251
  st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
252
  else:
253
  st.write("Match not found")
 
6
 
7
  # For Model
8
  from transformers import ViTModel, ViTConfig, pipeline
9
+ import insightface
10
+ from insightface.app import FaceAnalysis
11
 
12
  # For data augmentation
13
  from torchvision import transforms, datasets
 
27
  # Other Generic Libraries
28
  import torch
29
  from PIL import Image
30
+ import cv2
31
  import os
32
  import streamlit as st
33
  import gc
 
51
  model_path = 'vit_pytorch_GPU_1.pt'
52
  webcam_path = 'captured_image.jpg'
53
 
54
+ IMAGE_SHAPE = 640
55
+
56
  # Set Title
57
  st.title("Employee Attendance System")
 
58
 
59
  # Define Image Processor
60
+ #image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
61
 
62
  # Define ML Model
63
+ #class FaceEmbeddingModel(torch.nn.Module):
64
+ # def __init__(self, model_name, embedding_size):
65
+ # super(FaceEmbeddingModel, self).__init__()
66
+ # self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
67
+ # self.backbone = ViTModel.from_pretrained(model_name, config=self.config) # Load ViT model
68
+ # self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
69
+ #
70
+ # def forward(self, images):
71
+ # x = self.backbone(images).last_hidden_state[:, 0] # Extract embeddings
72
+ # x = self.fc(x) # Convert to 512D embedding
73
+ # return torch.nn.functional.normalize(x) # Normalize for cosine similarity
74
+
75
+
76
  # Load the model
77
+ #model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
78
 
79
  # Define the ML model - Evaluation function
80
+ #def prod_function(transformer_model, prod_dl, webcam_dl):
81
+ # # Initialize accelerator
82
+ # accelerator = Accelerator()
83
+ #
84
+ # # to INFO for the main process only.
85
+ # #if accelerator.is_main_process:
86
+ # # datasets.utils.logging.set_verbosity_warning()
87
+ # # transformers.utils.logging.set_verbosity_info()
88
+ # #else:
89
+ # # datasets.utils.logging.set_verbosity_error()
90
+ # # transformers.utils.logging.set_verbosity_error()
91
+ #
92
+ # # The seed need to be set before we instantiate the model, as it will determine the random head.
93
+ # set_seed(42)
94
+ #
95
+ # # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
96
+ # accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
97
+ #
98
+ # # Evaluate at the end of the epoch
99
+ # accelerated_model.eval()
100
+ #
101
+ # # Find Embedding of the image to be evaluated
102
+ # for batch in acclerated_webcam_dl:
103
+ # with torch.no_grad():
104
+ # #img_prod = acclerated_prod_data['pixel_values']
105
+ # emb_prod = accelerated_model(batch['pixel_values'])
106
+ #
107
+ # prod_preds = []
108
+ #
109
+ # for batch in acclerated_prod_dl:
110
+ # #img = batch['pixel_values']
111
+ # with torch.no_grad():
112
+ # emb = accelerated_model(batch['pixel_values'])
113
+ # distance = F.pairwise_distance(emb, emb_prod)
114
+ #
115
+ # prod_preds.append(distance)
116
+ # return prod_preds
117
+
118
  # Creation of Dataloader
119
+ #class CustomDatasetProd(Dataset):
120
+ # def __init__(self, image_path, webcam):
121
+ # self.image_path = image_path
122
+ # self.webcam = webcam
123
+ #
124
+ # def __len__(self):
125
+ # return len(self.image_path)
126
+ #
127
+ # def __getitem__(self, idx):
128
+ # if webcam == False:
129
+ # img = cv2.imread(image_path[idx])
130
+ # else:
131
+ # img = image_path
132
+ # faces = app.get(img)
133
+ #
134
+ # if not faces:
135
+ # raise Exception("No face detected")
136
+ #
137
+ # pixel_values = faces[0].embedding # embedding is a 512-dimensional vector
138
+ # item = {
139
+ # 'pixel_values': pixel_values.squeeze(0),
140
+ # }
141
+ # return item
142
 
143
  # Creation of Dataset
144
+ #class CreateDatasetProd():
145
+ # def __init__(self, image_processor):
146
+ # super().__init__()
147
+ # self.image_processor = image_processor
148
+ # # Define a transformation pipeline
149
+ # self.transform_prod = transforms.v2.Compose([
150
+ # transforms.v2.ToImage(),
151
+ # transforms.v2.ToDtype(torch.uint8, scale=False)
152
+ # ])
153
+ #
154
+ # def get_pixels(self, img_paths):
155
+ # pixel_values = []
156
+ # for path in img_paths:
157
+ # # Read and process Images
158
+ # img = Image.open(path)
159
+ # img = self.transform_prod(img)
160
+ #
161
+ # # Scaling the video to ML model's desired format
162
+ # img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
163
+ #
164
+ # pixel_values.append(img['pixel_values'].squeeze(0))
165
+ #
166
+ # # Force garbage collection
167
+ # del img
168
+ # gc.collect()
169
+ # return pixel_values
170
+ #
171
+ # def get_pixel(self, img_path):
172
+ # # Read and process Images
173
+ # img = Image.open(img_path)
174
+ # img = self.transform_prod(img)
175
+ #
176
+ # # Scaling the video to ML model's desired format
177
+ # img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
178
+ #
179
+ # pixel_values = img['pixel_values'] #.squeeze(0)
180
+ #
181
+ # # Force garbage collection
182
+ # del img
183
+ # gc.collect()
184
+ #
185
+ # return pixel_values
186
+ #
187
+ # def create_dataset(self, image_paths, webcam=False):
188
+ # if webcam == True:
189
+ # pixel_values = self.get_pixel(image_paths)
190
+ # else:
191
+ # pixel_values = torch.stack(self.get_pixels(image_paths))
192
+ #
193
+ # return CustomDatasetProd(pixel_values=pixel_values)
194
+
195
  # Read images from directory
196
  image_paths = []
197
  image_file = glob(os.path.join(data_path, '*.jpg'))
 
200
  #st.write('input path size:', len(image_paths))
201
  #st.write(image_paths)
202
 
203
+ # Initialize the app
204
+ app = FaceAnalysis(name="buffalo_l") # buffalo_l includes ArcFace model
205
+ app.prepare(ctx_id=-1, det_size=(IMAGE_SHAPE, IMAGE_SHAPE)) # Use ctx_id=-1 if you want CPU, and ctx_id=0 for GPU
206
+
207
  # Create DataLoader for Employees image
208
+ #dataset_prod_obj = CreateDatasetProd(image_processor_prod)
209
+ #prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
210
+ #prod_dl = DataLoader(prod_ds, webcam=False, batch_size=BATCH_SIZE)
211
 
212
  ## Testing the dataloader
213
  #prod_inputs = next(iter(prod_dl))
214
  #st.write(prod_inputs['pixel_values'].shape)
215
 
216
+
217
+ # Define the ML model - Evaluation function
218
+ def prod_function(app, prod_path, webcam_path):
219
+ webcam_img = cv2.imread(webcam_path)
220
+ webcam_emb = app.get(webcam_img, max_num=1)
221
+ webcam_emb = webcam_emb[0].embedding
222
+
223
+ similarity_score = []
224
+ for path in prod_path:
225
+ img = cv2.imread(path)
226
+ face_embedding = app.get(img, max_num=1)
227
+ face_embedding = face_embedding[0].embedding
228
+
229
+ similarity_score.append(F.cosine_similarity(face_embedding,webcam_emb, dim=0))
230
+ #distance = F.pairwise_distance(emb, emb_prod)
231
+ #prod_preds.append(distance)
232
+
233
+ return similarity_score #prod_preds
234
+
235
  about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
236
  # About the app Tab
237
  with about_tab:
 
270
  #st.write('Image saved as:',webcam_path)
271
 
272
  ## Create DataLoader for Webcam Image
273
+ #webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
274
+ #webcam_dl = DataLoader(picture, webcam=True, batch_size=BATCH_SIZE)
275
 
276
  ## Testing the dataloader
277
  #prod_inputs = next(iter(webcam_dl))
 
279
 
280
  with st.spinner("Wait for it...", show_time=True):
281
  # Run the predictions
282
+ prediction = prod_function(app, image_paths, picture)
283
+ #predictions = torch.cat(prediction, 0).to(device)
284
+ #match_idx = torch.argmin(predictions)
285
  st.write(predictions)
286
  st.write(image_paths)
287
 
288
  # Display the results
289
+ if predictions[match_idx] >= 0.9:
290
  st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
291
  else:
292
  st.write("Match not found")