Spaces:
Sleeping
Sleeping
change to insightFace model
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from transformers import ViTImageProcessor
|
|
6 |
|
7 |
# For Model
|
8 |
from transformers import ViTModel, ViTConfig, pipeline
|
|
|
|
|
9 |
|
10 |
# For data augmentation
|
11 |
from torchvision import transforms, datasets
|
@@ -25,6 +27,7 @@ from torch.utils.data import Dataset, DataLoader
|
|
25 |
# Other Generic Libraries
|
26 |
import torch
|
27 |
from PIL import Image
|
|
|
28 |
import os
|
29 |
import streamlit as st
|
30 |
import gc
|
@@ -48,134 +51,147 @@ data_path = 'employees'
|
|
48 |
model_path = 'vit_pytorch_GPU_1.pt'
|
49 |
webcam_path = 'captured_image.jpg'
|
50 |
|
|
|
|
|
51 |
# Set Title
|
52 |
st.title("Employee Attendance System")
|
53 |
-
#pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
|
54 |
|
55 |
# Define Image Processor
|
56 |
-
image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
|
57 |
|
58 |
# Define ML Model
|
59 |
-
class FaceEmbeddingModel(torch.nn.Module):
|
60 |
-
def __init__(self, model_name, embedding_size):
|
61 |
-
super(FaceEmbeddingModel, self).__init__()
|
62 |
-
self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
|
63 |
-
self.backbone = ViTModel.from_pretrained(model_name, config=self.config) # Load ViT model
|
64 |
-
self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
|
65 |
-
|
66 |
-
def forward(self, images):
|
67 |
-
x = self.backbone(images).last_hidden_state[:, 0] # Extract embeddings
|
68 |
-
x = self.fc(x) # Convert to 512D embedding
|
69 |
-
return torch.nn.functional.normalize(x) # Normalize for cosine similarity
|
70 |
-
|
|
|
71 |
# Load the model
|
72 |
-
model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
|
73 |
|
74 |
# Define the ML model - Evaluation function
|
75 |
-
def prod_function(transformer_model, prod_dl, webcam_dl):
|
76 |
-
# Initialize accelerator
|
77 |
-
accelerator = Accelerator()
|
78 |
-
|
79 |
-
# to INFO for the main process only.
|
80 |
-
if accelerator.is_main_process:
|
81 |
-
|
82 |
-
|
83 |
-
else:
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
# The seed need to be set before we instantiate the model, as it will determine the random head.
|
88 |
-
set_seed(42)
|
89 |
-
|
90 |
-
# There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
|
91 |
-
accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
|
92 |
-
|
93 |
-
# Evaluate at the end of the epoch
|
94 |
-
accelerated_model.eval()
|
95 |
-
|
96 |
-
# Find Embedding of the image to be evaluated
|
97 |
-
for batch in acclerated_webcam_dl:
|
98 |
-
with torch.no_grad():
|
99 |
-
#img_prod = acclerated_prod_data['pixel_values']
|
100 |
-
emb_prod = accelerated_model(batch['pixel_values'])
|
101 |
-
|
102 |
-
prod_preds = []
|
103 |
-
|
104 |
-
for batch in acclerated_prod_dl:
|
105 |
-
#img = batch['pixel_values']
|
106 |
-
with torch.no_grad():
|
107 |
-
emb = accelerated_model(batch['pixel_values'])
|
108 |
-
distance = F.pairwise_distance(emb, emb_prod)
|
109 |
-
|
110 |
-
prod_preds.append(distance)
|
111 |
-
return prod_preds
|
112 |
-
|
113 |
# Creation of Dataloader
|
114 |
-
class CustomDatasetProd(Dataset):
|
115 |
-
def __init__(self,
|
116 |
-
self.
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# Creation of Dataset
|
128 |
-
class CreateDatasetProd():
|
129 |
-
def __init__(self, image_processor):
|
130 |
-
super().__init__()
|
131 |
-
self.image_processor = image_processor
|
132 |
-
# Define a transformation pipeline
|
133 |
-
self.transform_prod = transforms.v2.Compose([
|
134 |
-
transforms.v2.ToImage(),
|
135 |
-
transforms.v2.ToDtype(torch.uint8, scale=False)
|
136 |
-
])
|
137 |
-
|
138 |
-
def get_pixels(self, img_paths):
|
139 |
-
pixel_values = []
|
140 |
-
for path in img_paths:
|
141 |
-
# Read and process Images
|
142 |
-
img = Image.open(path)
|
143 |
-
img = self.transform_prod(img)
|
144 |
-
|
145 |
-
# Scaling the video to ML model's desired format
|
146 |
-
img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
|
147 |
-
|
148 |
-
pixel_values.append(img['pixel_values'].squeeze(0))
|
149 |
-
|
150 |
-
# Force garbage collection
|
151 |
-
del img
|
152 |
-
gc.collect()
|
153 |
-
return pixel_values
|
154 |
-
|
155 |
-
def get_pixel(self, img_path):
|
156 |
-
# Read and process Images
|
157 |
-
img = Image.open(img_path)
|
158 |
-
img = self.transform_prod(img)
|
159 |
-
|
160 |
-
# Scaling the video to ML model's desired format
|
161 |
-
img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
|
162 |
-
|
163 |
-
pixel_values = img['pixel_values'] #.squeeze(0)
|
164 |
-
|
165 |
-
# Force garbage collection
|
166 |
-
del img
|
167 |
-
gc.collect()
|
168 |
-
|
169 |
-
return pixel_values
|
170 |
-
|
171 |
-
def create_dataset(self, image_paths, webcam=False):
|
172 |
-
if webcam == True:
|
173 |
-
pixel_values = self.get_pixel(image_paths)
|
174 |
-
else:
|
175 |
-
pixel_values = torch.stack(self.get_pixels(image_paths))
|
176 |
-
|
177 |
-
return CustomDatasetProd(pixel_values=pixel_values)
|
178 |
-
|
179 |
# Read images from directory
|
180 |
image_paths = []
|
181 |
image_file = glob(os.path.join(data_path, '*.jpg'))
|
@@ -184,15 +200,38 @@ image_paths.extend(image_file)
|
|
184 |
#st.write('input path size:', len(image_paths))
|
185 |
#st.write(image_paths)
|
186 |
|
|
|
|
|
|
|
|
|
187 |
# Create DataLoader for Employees image
|
188 |
-
dataset_prod_obj = CreateDatasetProd(image_processor_prod)
|
189 |
-
prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
|
190 |
-
prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
|
191 |
|
192 |
## Testing the dataloader
|
193 |
#prod_inputs = next(iter(prod_dl))
|
194 |
#st.write(prod_inputs['pixel_values'].shape)
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
|
197 |
# About the app Tab
|
198 |
with about_tab:
|
@@ -231,8 +270,8 @@ with app_tab:
|
|
231 |
#st.write('Image saved as:',webcam_path)
|
232 |
|
233 |
## Create DataLoader for Webcam Image
|
234 |
-
webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
|
235 |
-
webcam_dl = DataLoader(
|
236 |
|
237 |
## Testing the dataloader
|
238 |
#prod_inputs = next(iter(webcam_dl))
|
@@ -240,14 +279,14 @@ with app_tab:
|
|
240 |
|
241 |
with st.spinner("Wait for it...", show_time=True):
|
242 |
# Run the predictions
|
243 |
-
prediction = prod_function(
|
244 |
-
predictions = torch.cat(prediction, 0).to(device)
|
245 |
-
match_idx = torch.argmin(predictions)
|
246 |
st.write(predictions)
|
247 |
st.write(image_paths)
|
248 |
|
249 |
# Display the results
|
250 |
-
if predictions[match_idx]
|
251 |
st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
|
252 |
else:
|
253 |
st.write("Match not found")
|
|
|
6 |
|
7 |
# For Model
|
8 |
from transformers import ViTModel, ViTConfig, pipeline
|
9 |
+
import insightface
|
10 |
+
from insightface.app import FaceAnalysis
|
11 |
|
12 |
# For data augmentation
|
13 |
from torchvision import transforms, datasets
|
|
|
27 |
# Other Generic Libraries
|
28 |
import torch
|
29 |
from PIL import Image
|
30 |
+
import cv2
|
31 |
import os
|
32 |
import streamlit as st
|
33 |
import gc
|
|
|
51 |
model_path = 'vit_pytorch_GPU_1.pt'
|
52 |
webcam_path = 'captured_image.jpg'
|
53 |
|
54 |
+
IMAGE_SHAPE = 640
|
55 |
+
|
56 |
# Set Title
|
57 |
st.title("Employee Attendance System")
|
|
|
58 |
|
59 |
# Define Image Processor
|
60 |
+
#image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
|
61 |
|
62 |
# Define ML Model
|
63 |
+
#class FaceEmbeddingModel(torch.nn.Module):
|
64 |
+
# def __init__(self, model_name, embedding_size):
|
65 |
+
# super(FaceEmbeddingModel, self).__init__()
|
66 |
+
# self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
|
67 |
+
# self.backbone = ViTModel.from_pretrained(model_name, config=self.config) # Load ViT model
|
68 |
+
# self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
|
69 |
+
#
|
70 |
+
# def forward(self, images):
|
71 |
+
# x = self.backbone(images).last_hidden_state[:, 0] # Extract embeddings
|
72 |
+
# x = self.fc(x) # Convert to 512D embedding
|
73 |
+
# return torch.nn.functional.normalize(x) # Normalize for cosine similarity
|
74 |
+
|
75 |
+
|
76 |
# Load the model
|
77 |
+
#model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
|
78 |
|
79 |
# Define the ML model - Evaluation function
|
80 |
+
#def prod_function(transformer_model, prod_dl, webcam_dl):
|
81 |
+
# # Initialize accelerator
|
82 |
+
# accelerator = Accelerator()
|
83 |
+
#
|
84 |
+
# # to INFO for the main process only.
|
85 |
+
# #if accelerator.is_main_process:
|
86 |
+
# # datasets.utils.logging.set_verbosity_warning()
|
87 |
+
# # transformers.utils.logging.set_verbosity_info()
|
88 |
+
# #else:
|
89 |
+
# # datasets.utils.logging.set_verbosity_error()
|
90 |
+
# # transformers.utils.logging.set_verbosity_error()
|
91 |
+
#
|
92 |
+
# # The seed need to be set before we instantiate the model, as it will determine the random head.
|
93 |
+
# set_seed(42)
|
94 |
+
#
|
95 |
+
# # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
|
96 |
+
# accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
|
97 |
+
#
|
98 |
+
# # Evaluate at the end of the epoch
|
99 |
+
# accelerated_model.eval()
|
100 |
+
#
|
101 |
+
# # Find Embedding of the image to be evaluated
|
102 |
+
# for batch in acclerated_webcam_dl:
|
103 |
+
# with torch.no_grad():
|
104 |
+
# #img_prod = acclerated_prod_data['pixel_values']
|
105 |
+
# emb_prod = accelerated_model(batch['pixel_values'])
|
106 |
+
#
|
107 |
+
# prod_preds = []
|
108 |
+
#
|
109 |
+
# for batch in acclerated_prod_dl:
|
110 |
+
# #img = batch['pixel_values']
|
111 |
+
# with torch.no_grad():
|
112 |
+
# emb = accelerated_model(batch['pixel_values'])
|
113 |
+
# distance = F.pairwise_distance(emb, emb_prod)
|
114 |
+
#
|
115 |
+
# prod_preds.append(distance)
|
116 |
+
# return prod_preds
|
117 |
+
|
118 |
# Creation of Dataloader
|
119 |
+
#class CustomDatasetProd(Dataset):
|
120 |
+
# def __init__(self, image_path, webcam):
|
121 |
+
# self.image_path = image_path
|
122 |
+
# self.webcam = webcam
|
123 |
+
#
|
124 |
+
# def __len__(self):
|
125 |
+
# return len(self.image_path)
|
126 |
+
#
|
127 |
+
# def __getitem__(self, idx):
|
128 |
+
# if webcam == False:
|
129 |
+
# img = cv2.imread(image_path[idx])
|
130 |
+
# else:
|
131 |
+
# img = image_path
|
132 |
+
# faces = app.get(img)
|
133 |
+
#
|
134 |
+
# if not faces:
|
135 |
+
# raise Exception("No face detected")
|
136 |
+
#
|
137 |
+
# pixel_values = faces[0].embedding # embedding is a 512-dimensional vector
|
138 |
+
# item = {
|
139 |
+
# 'pixel_values': pixel_values.squeeze(0),
|
140 |
+
# }
|
141 |
+
# return item
|
142 |
|
143 |
# Creation of Dataset
|
144 |
+
#class CreateDatasetProd():
|
145 |
+
# def __init__(self, image_processor):
|
146 |
+
# super().__init__()
|
147 |
+
# self.image_processor = image_processor
|
148 |
+
# # Define a transformation pipeline
|
149 |
+
# self.transform_prod = transforms.v2.Compose([
|
150 |
+
# transforms.v2.ToImage(),
|
151 |
+
# transforms.v2.ToDtype(torch.uint8, scale=False)
|
152 |
+
# ])
|
153 |
+
#
|
154 |
+
# def get_pixels(self, img_paths):
|
155 |
+
# pixel_values = []
|
156 |
+
# for path in img_paths:
|
157 |
+
# # Read and process Images
|
158 |
+
# img = Image.open(path)
|
159 |
+
# img = self.transform_prod(img)
|
160 |
+
#
|
161 |
+
# # Scaling the video to ML model's desired format
|
162 |
+
# img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
|
163 |
+
#
|
164 |
+
# pixel_values.append(img['pixel_values'].squeeze(0))
|
165 |
+
#
|
166 |
+
# # Force garbage collection
|
167 |
+
# del img
|
168 |
+
# gc.collect()
|
169 |
+
# return pixel_values
|
170 |
+
#
|
171 |
+
# def get_pixel(self, img_path):
|
172 |
+
# # Read and process Images
|
173 |
+
# img = Image.open(img_path)
|
174 |
+
# img = self.transform_prod(img)
|
175 |
+
#
|
176 |
+
# # Scaling the video to ML model's desired format
|
177 |
+
# img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
|
178 |
+
#
|
179 |
+
# pixel_values = img['pixel_values'] #.squeeze(0)
|
180 |
+
#
|
181 |
+
# # Force garbage collection
|
182 |
+
# del img
|
183 |
+
# gc.collect()
|
184 |
+
#
|
185 |
+
# return pixel_values
|
186 |
+
#
|
187 |
+
# def create_dataset(self, image_paths, webcam=False):
|
188 |
+
# if webcam == True:
|
189 |
+
# pixel_values = self.get_pixel(image_paths)
|
190 |
+
# else:
|
191 |
+
# pixel_values = torch.stack(self.get_pixels(image_paths))
|
192 |
+
#
|
193 |
+
# return CustomDatasetProd(pixel_values=pixel_values)
|
194 |
+
|
195 |
# Read images from directory
|
196 |
image_paths = []
|
197 |
image_file = glob(os.path.join(data_path, '*.jpg'))
|
|
|
200 |
#st.write('input path size:', len(image_paths))
|
201 |
#st.write(image_paths)
|
202 |
|
203 |
+
# Initialize the app
|
204 |
+
app = FaceAnalysis(name="buffalo_l") # buffalo_l includes ArcFace model
|
205 |
+
app.prepare(ctx_id=-1, det_size=(IMAGE_SHAPE, IMAGE_SHAPE)) # Use ctx_id=-1 if you want CPU, and ctx_id=0 for GPU
|
206 |
+
|
207 |
# Create DataLoader for Employees image
|
208 |
+
#dataset_prod_obj = CreateDatasetProd(image_processor_prod)
|
209 |
+
#prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
|
210 |
+
#prod_dl = DataLoader(prod_ds, webcam=False, batch_size=BATCH_SIZE)
|
211 |
|
212 |
## Testing the dataloader
|
213 |
#prod_inputs = next(iter(prod_dl))
|
214 |
#st.write(prod_inputs['pixel_values'].shape)
|
215 |
|
216 |
+
|
217 |
+
# Define the ML model - Evaluation function
|
218 |
+
def prod_function(app, prod_path, webcam_path):
|
219 |
+
webcam_img = cv2.imread(webcam_path)
|
220 |
+
webcam_emb = app.get(webcam_img, max_num=1)
|
221 |
+
webcam_emb = webcam_emb[0].embedding
|
222 |
+
|
223 |
+
similarity_score = []
|
224 |
+
for path in prod_path:
|
225 |
+
img = cv2.imread(path)
|
226 |
+
face_embedding = app.get(img, max_num=1)
|
227 |
+
face_embedding = face_embedding[0].embedding
|
228 |
+
|
229 |
+
similarity_score.append(F.cosine_similarity(face_embedding,webcam_emb, dim=0))
|
230 |
+
#distance = F.pairwise_distance(emb, emb_prod)
|
231 |
+
#prod_preds.append(distance)
|
232 |
+
|
233 |
+
return similarity_score #prod_preds
|
234 |
+
|
235 |
about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
|
236 |
# About the app Tab
|
237 |
with about_tab:
|
|
|
270 |
#st.write('Image saved as:',webcam_path)
|
271 |
|
272 |
## Create DataLoader for Webcam Image
|
273 |
+
#webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
|
274 |
+
#webcam_dl = DataLoader(picture, webcam=True, batch_size=BATCH_SIZE)
|
275 |
|
276 |
## Testing the dataloader
|
277 |
#prod_inputs = next(iter(webcam_dl))
|
|
|
279 |
|
280 |
with st.spinner("Wait for it...", show_time=True):
|
281 |
# Run the predictions
|
282 |
+
prediction = prod_function(app, image_paths, picture)
|
283 |
+
#predictions = torch.cat(prediction, 0).to(device)
|
284 |
+
#match_idx = torch.argmin(predictions)
|
285 |
st.write(predictions)
|
286 |
st.write(image_paths)
|
287 |
|
288 |
# Display the results
|
289 |
+
if predictions[match_idx] >= 0.9:
|
290 |
st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
|
291 |
else:
|
292 |
st.write("Match not found")
|