Spaces:
Sleeping
Sleeping
Commit
·
f02fce3
1
Parent(s):
91e037a
Update app.py
Browse files
app.py
CHANGED
@@ -71,7 +71,190 @@ image_path_to_id = {im['file_name']: im['id']
|
|
71 |
for im in test_metadata['images']}
|
72 |
|
73 |
faster_rcnn_model = load_model('../../Faster RCNN/saved_model')
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def segment(image):
|
76 |
pass # Implement your image segmentation model here...
|
77 |
|
|
|
71 |
for im in test_metadata['images']}
|
72 |
|
73 |
faster_rcnn_model = load_model('../../Faster RCNN/saved_model')
|
74 |
+
print(faster_rcnn_model.inputs)
|
75 |
+
print(faster_rcnn_model.outputs)
|
76 |
+
|
77 |
+
def run_inference_for_single_image(model, image):
|
78 |
+
'''Run single image through tensorflow object detection saved_model.
|
79 |
+
|
80 |
+
This function runs a saved_model on a (single) provided image and returns
|
81 |
+
inference results in numpy arrays.
|
82 |
+
|
83 |
+
Args:
|
84 |
+
model: tensorflow saved_model. This model can be obtained using
|
85 |
+
export_inference_graph.py.
|
86 |
+
image: uint8 numpy array with shape (img_height, img_width, 3)
|
87 |
+
|
88 |
+
Returns:
|
89 |
+
output_dict: a dictionary holding the following entries:
|
90 |
+
`num_detections`: an integer
|
91 |
+
`detection_boxes`: a numpy (float32) array of shape [N, 4]
|
92 |
+
`detection_classes`: a numpy (uint8) array of shape [N]
|
93 |
+
`detection_scores`: a numpy (float32) array of shape [N]
|
94 |
+
`detection_features`: a numpy (float32) array of shape [N, 7, 7, 2048]
|
95 |
+
'''
|
96 |
+
image = np.asarray(image)
|
97 |
+
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
|
98 |
+
input_tensor = tf.convert_to_tensor(image)
|
99 |
+
# The model expects a batch of images, so add an axis with `tf.newaxis`.
|
100 |
+
input_tensor = input_tensor[tf.newaxis,...]
|
101 |
+
|
102 |
+
# Run inference
|
103 |
+
output_dict = model(input_tensor)
|
104 |
+
# All outputs are batches tensors.
|
105 |
+
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
|
106 |
+
# We're only interested in the first num_detections.
|
107 |
+
num_dets = output_dict.pop('num_detections')
|
108 |
+
num_detections = int(num_dets)
|
109 |
+
for key,value in output_dict.items():
|
110 |
+
output_dict[key] = value[0, :num_detections].numpy()
|
111 |
+
output_dict['num_detections'] = num_detections
|
112 |
+
|
113 |
+
# detection_classes should be ints.
|
114 |
+
output_dict['detection_classes'] = output_dict['detection_classes'].astype(
|
115 |
+
np.int64)
|
116 |
+
return output_dict
|
117 |
+
|
118 |
+
def embed_date_captured(date_captured):
|
119 |
+
"""Encodes the datetime of the image.
|
120 |
+
|
121 |
+
Takes a datetime object and encodes it into a normalized embedding of shape
|
122 |
+
[5], using hard-coded normalization factors for year, month, day, hour,
|
123 |
+
minute.
|
124 |
+
|
125 |
+
Args:
|
126 |
+
date_captured: A datetime object.
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
A numpy float32 embedding of shape [5].
|
130 |
+
"""
|
131 |
+
embedded_date_captured = []
|
132 |
+
month_max = 12.0
|
133 |
+
day_max = 31.0
|
134 |
+
hour_max = 24.0
|
135 |
+
minute_max = 60.0
|
136 |
+
min_year = 1990.0
|
137 |
+
max_year = 2030.0
|
138 |
+
|
139 |
+
year = (date_captured.year-min_year)/float(max_year-min_year)
|
140 |
+
embedded_date_captured.append(year)
|
141 |
+
|
142 |
+
month = (date_captured.month-1)/month_max
|
143 |
+
embedded_date_captured.append(month)
|
144 |
+
|
145 |
+
day = (date_captured.day-1)/day_max
|
146 |
+
embedded_date_captured.append(day)
|
147 |
+
|
148 |
+
hour = date_captured.hour/hour_max
|
149 |
+
embedded_date_captured.append(hour)
|
150 |
+
|
151 |
+
minute = date_captured.minute/minute_max
|
152 |
+
embedded_date_captured.append(minute)
|
153 |
+
|
154 |
+
return np.asarray(embedded_date_captured)
|
155 |
+
|
156 |
+
def embed_position_and_size(box):
|
157 |
+
"""Encodes the bounding box of the object of interest.
|
158 |
+
|
159 |
+
Takes a bounding box and encodes it into a normalized embedding of shape
|
160 |
+
[4] - the center point (x,y) and width and height of the box.
|
161 |
+
|
162 |
+
Args:
|
163 |
+
box: A bounding box, formatted as [ymin, xmin, ymax, xmax].
|
164 |
+
|
165 |
+
Returns:
|
166 |
+
A numpy float32 embedding of shape [4].
|
167 |
+
"""
|
168 |
+
ymin = box[0]
|
169 |
+
xmin = box[1]
|
170 |
+
ymax = box[2]
|
171 |
+
xmax = box[3]
|
172 |
+
w = xmax - xmin
|
173 |
+
h = ymax - ymin
|
174 |
+
x = xmin + w / 2.0
|
175 |
+
y = ymin + h / 2.0
|
176 |
+
return np.asarray([x, y, w, h])
|
177 |
+
|
178 |
+
def get_context_feature_embedding(date_captured, detection_boxes,
|
179 |
+
detection_features, detection_scores):
|
180 |
+
"""Extracts representative feature embedding for a given input image.
|
181 |
+
|
182 |
+
Takes outputs of a detection model and focuses on the highest-confidence
|
183 |
+
detected object. Starts with detection_features and uses average pooling to
|
184 |
+
remove the spatial dimensions, then appends an embedding of the box position
|
185 |
+
and size, and an embedding of the date and time the image was captured,
|
186 |
+
returning a one-dimensional representation of the object.
|
187 |
+
|
188 |
+
Args:
|
189 |
+
date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.
|
190 |
+
detection_features: A numpy (float32) array of shape [N, 7, 7, 2048].
|
191 |
+
detection_boxes: A numpy (float32) array of shape [N, 4].
|
192 |
+
detection_scores: A numpy (float32) array of shape [N].
|
193 |
+
|
194 |
+
Returns:
|
195 |
+
A numpy float32 embedding of shape [2057].
|
196 |
+
"""
|
197 |
+
date_captured = datetime.strptime(date_captured,'%Y-%m-%d %H:%M:%S')
|
198 |
+
temporal_embedding = embed_date_captured(date_captured)
|
199 |
+
embedding = detection_features[0]
|
200 |
+
pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
|
201 |
+
box = detection_boxes[0]
|
202 |
+
position_embedding = embed_position_and_size(box)
|
203 |
+
bb_embedding = np.concatenate((pooled_embedding, position_embedding))
|
204 |
+
embedding = np.expand_dims(np.concatenate((bb_embedding,temporal_embedding)),
|
205 |
+
axis=0)
|
206 |
+
score = detection_scores[0]
|
207 |
+
return embedding, score
|
208 |
+
|
209 |
+
def run_inference(model, image_path, date_captured, resize_image=True):
|
210 |
+
"""Runs inference over a single input image and extracts contextual features.
|
211 |
+
|
212 |
+
Args:
|
213 |
+
model: A tensorflow saved_model object.
|
214 |
+
image_path: Absolute path to the input image.
|
215 |
+
date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.
|
216 |
+
resize_image: Whether to resize the input image before running inference.
|
217 |
+
|
218 |
+
Returns:
|
219 |
+
context_feature: A numpy float32 array of shape [2057].
|
220 |
+
score: A numpy float32 object score for the embedded object.
|
221 |
+
output_dict: The saved_model output dictionary for the image.
|
222 |
+
"""
|
223 |
+
with open(image_path,'rb') as f:
|
224 |
+
image = Image.open(f)
|
225 |
+
if resize_image:
|
226 |
+
image.thumbnail((640,640),Image.ANTIALIAS)
|
227 |
+
image_np = np.array(image)
|
228 |
+
|
229 |
+
# Actual detection.
|
230 |
+
output_dict = run_inference_for_single_image(model, image_np)
|
231 |
+
|
232 |
+
context_feature, score = get_context_feature_embedding(
|
233 |
+
date_captured, output_dict['detection_boxes'],
|
234 |
+
output_dict['detection_features'], output_dict['detection_scores'])
|
235 |
+
return context_feature, score, output_dict
|
236 |
+
|
237 |
+
import posixpath
|
238 |
+
context_features = []
|
239 |
+
scores = []
|
240 |
+
faster_rcnn_results = {}
|
241 |
+
for image_path in TEST_IMAGE_PATHS:
|
242 |
+
head,tail = posixpath.split(image_path)
|
243 |
+
image_id = image_path_to_id[str(tail)]
|
244 |
+
date_captured = image_id_to_datetime[image_id]
|
245 |
+
context_feature, score, results = run_inference(
|
246 |
+
faster_rcnn_model, image_path, date_captured)
|
247 |
+
faster_rcnn_results[image_id] = results
|
248 |
+
context_features.append(context_feature)
|
249 |
+
scores.append(score)
|
250 |
+
|
251 |
+
# Concatenate all extracted context embeddings into a contextual memory bank.
|
252 |
+
context_features_matrix = np.concatenate(context_features, axis=0)
|
253 |
+
context_rcnn_model = load_model('../../Context RCNN/saved_model')
|
254 |
+
context_padding_size = 2000
|
255 |
+
print(context_rcnn_model.inputs)
|
256 |
+
print(context_rcnn_model.outputs)
|
257 |
+
|
258 |
def segment(image):
|
259 |
pass # Implement your image segmentation model here...
|
260 |
|