Spaces:

prakrutpatel
/

ContextRCNN_Gradio

Sleeping

App Files Files Community

prakrutpatel commited on Mar 6, 2023

Commit

f02fce3

1 Parent(s): 91e037a

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -1

app.py CHANGED Viewed

@@ -71,7 +71,190 @@ image_path_to_id = {im['file_name']: im['id']
                     for im in test_metadata['images']}
 faster_rcnn_model = load_model('../../Faster RCNN/saved_model')
 def segment(image):
     pass  # Implement your image segmentation model here...

                     for im in test_metadata['images']}
 faster_rcnn_model = load_model('../../Faster RCNN/saved_model')
+print(faster_rcnn_model.inputs)
+print(faster_rcnn_model.outputs)
+def run_inference_for_single_image(model, image):
+  '''Run single image through tensorflow object detection saved_model.
+  This function runs a saved_model on a (single) provided image and returns
+  inference results in numpy arrays.
+  Args:
+    model: tensorflow saved_model. This model can be obtained using
+      export_inference_graph.py.
+    image: uint8 numpy array with shape (img_height, img_width, 3)
+  Returns:
+    output_dict: a dictionary holding the following entries:
+      `num_detections`: an integer
+      `detection_boxes`: a numpy (float32) array of shape [N, 4]
+      `detection_classes`: a numpy (uint8) array of shape [N]
+      `detection_scores`: a numpy (float32) array of shape [N]
+      `detection_features`: a numpy (float32) array of shape [N, 7, 7, 2048]
+  '''
+  image = np.asarray(image)
+  # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
+  input_tensor = tf.convert_to_tensor(image)
+  # The model expects a batch of images, so add an axis with `tf.newaxis`.
+  input_tensor = input_tensor[tf.newaxis,...]
+  # Run inference
+  output_dict = model(input_tensor)
+  # All outputs are batches tensors.
+  # Convert to numpy arrays, and take index [0] to remove the batch dimension.
+  # We're only interested in the first num_detections.
+  num_dets = output_dict.pop('num_detections')
+  num_detections = int(num_dets)
+  for key,value in output_dict.items():
+    output_dict[key] = value[0, :num_detections].numpy()
+  output_dict['num_detections'] = num_detections
+  # detection_classes should be ints.
+  output_dict['detection_classes'] = output_dict['detection_classes'].astype(
+      np.int64)
+  return output_dict
+def embed_date_captured(date_captured):
+  """Encodes the datetime of the image.
+  Takes a datetime object and encodes it into a normalized embedding of shape
+  [5], using hard-coded normalization factors for year, month, day, hour,
+  minute.
+  Args:
+    date_captured: A datetime object.
+  Returns:
+    A numpy float32 embedding of shape [5].
+  """
+  embedded_date_captured = []
+  month_max = 12.0
+  day_max = 31.0
+  hour_max = 24.0
+  minute_max = 60.0
+  min_year = 1990.0
+  max_year = 2030.0
+  year = (date_captured.year-min_year)/float(max_year-min_year)
+  embedded_date_captured.append(year)
+  month = (date_captured.month-1)/month_max
+  embedded_date_captured.append(month)
+  day = (date_captured.day-1)/day_max
+  embedded_date_captured.append(day)
+  hour = date_captured.hour/hour_max
+  embedded_date_captured.append(hour)
+  minute = date_captured.minute/minute_max
+  embedded_date_captured.append(minute)
+  return np.asarray(embedded_date_captured)
+def embed_position_and_size(box):
+  """Encodes the bounding box of the object of interest.
+  Takes a bounding box and encodes it into a normalized embedding of shape
+  [4] - the center point (x,y) and width and height of the box.
+  Args:
+    box: A bounding box, formatted as [ymin, xmin, ymax, xmax].
+  Returns:
+    A numpy float32 embedding of shape [4].
+  """
+  ymin = box[0]
+  xmin = box[1]
+  ymax = box[2]
+  xmax = box[3]
+  w = xmax - xmin
+  h = ymax - ymin
+  x = xmin + w / 2.0
+  y = ymin + h / 2.0
+  return np.asarray([x, y, w, h])
+def get_context_feature_embedding(date_captured, detection_boxes,
+                                  detection_features, detection_scores):
+  """Extracts representative feature embedding for a given input image.
+  Takes outputs of a detection model and focuses on the highest-confidence
+  detected object. Starts with detection_features and uses average pooling to
+  remove the spatial dimensions, then appends an embedding of the box position
+  and size, and an embedding of the date and time the image was captured,
+  returning a one-dimensional representation of the object.
+  Args:
+    date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.
+    detection_features: A numpy (float32) array of shape [N, 7, 7, 2048].
+    detection_boxes: A numpy (float32) array of shape [N, 4].
+    detection_scores: A numpy (float32) array of shape [N].
+  Returns:
+    A numpy float32 embedding of shape [2057].
+  """
+  date_captured = datetime.strptime(date_captured,'%Y-%m-%d %H:%M:%S')
+  temporal_embedding = embed_date_captured(date_captured)
+  embedding = detection_features[0]
+  pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
+  box = detection_boxes[0]
+  position_embedding = embed_position_and_size(box)
+  bb_embedding = np.concatenate((pooled_embedding, position_embedding))
+  embedding = np.expand_dims(np.concatenate((bb_embedding,temporal_embedding)),
+                             axis=0)
+  score = detection_scores[0]
+  return embedding, score
+def run_inference(model, image_path, date_captured, resize_image=True):
+  """Runs inference over a single input image and extracts contextual features.
+  Args:
+    model: A tensorflow saved_model object.
+    image_path: Absolute path to the input image.
+    date_captured: A datetime string of format '%Y-%m-%d %H:%M:%S'.
+    resize_image: Whether to resize the input image before running inference.
+  Returns:
+    context_feature: A numpy float32 array of shape [2057].
+    score: A numpy float32 object score for the embedded object.
+    output_dict: The saved_model output dictionary for the image.
+  """
+  with open(image_path,'rb') as f:
+    image = Image.open(f)
+    if resize_image:
+      image.thumbnail((640,640),Image.ANTIALIAS)
+    image_np = np.array(image)
+  # Actual detection.
+  output_dict = run_inference_for_single_image(model, image_np)
+  context_feature, score = get_context_feature_embedding(
+      date_captured, output_dict['detection_boxes'],
+      output_dict['detection_features'], output_dict['detection_scores'])
+  return context_feature, score, output_dict
+import posixpath
+context_features = []
+scores = []
+faster_rcnn_results = {}
+for image_path in TEST_IMAGE_PATHS:
+    head,tail = posixpath.split(image_path)
+    image_id = image_path_to_id[str(tail)]
+    date_captured = image_id_to_datetime[image_id]
+    context_feature, score, results = run_inference(
+        faster_rcnn_model, image_path, date_captured)
+    faster_rcnn_results[image_id] = results
+    context_features.append(context_feature)
+    scores.append(score)
+# Concatenate all extracted context embeddings into a contextual memory bank.
+context_features_matrix = np.concatenate(context_features, axis=0)
+context_rcnn_model = load_model('../../Context RCNN/saved_model')
+context_padding_size = 2000
+print(context_rcnn_model.inputs)
+print(context_rcnn_model.outputs)
 def segment(image):
     pass  # Implement your image segmentation model here...