Spaces:
Runtime error
Runtime error
# coding=utf-8 | |
# Copyright 2021 The Deeplab2 Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""This file contains code to create an evaluator runner. | |
Note that the evaluator is not well-optimized for inference speed. There are | |
some redundant outputs, e.g., visualization results, evaluation loss, and so | |
on. We still compute them in this implementation with the goal to provide more | |
detailed information for research development. One should remove those | |
redundant outputs for a faster inference speed. | |
""" | |
import os | |
import orbit | |
import tensorflow as tf | |
from deeplab2 import common | |
from deeplab2.data import dataset | |
from deeplab2.evaluation import coco_instance_ap as instance_ap | |
from deeplab2.evaluation import panoptic_quality | |
from deeplab2.evaluation import segmentation_and_tracking_quality as stq | |
from deeplab2.evaluation import video_panoptic_quality as vpq | |
from deeplab2.model import utils | |
from deeplab2.trainer import runner_utils | |
from deeplab2.trainer import vis | |
_PANOPTIC_METRIC_OFFSET = 256 * 256 | |
# Video Panoptic Segmentation requires a larger offset value for accommodating | |
# more instance IDs. | |
_VIDEO_PANOPTIC_METRIC_OFFSET = _PANOPTIC_METRIC_OFFSET * 256 | |
_PREDICTIONS_KEY = 'unique_key_for_storing_predictions' | |
_LABELS_KEY = 'unique_key_for_storing_labels' | |
class Evaluator(orbit.StandardEvaluator): | |
"""Implements an evaluator for DeepLab models.""" | |
def __init__(self, config, model, loss, global_step, model_dir): | |
"""Initializes the Evaluator. | |
Args: | |
config: A config_pb2.ExperimentOptions configuration. | |
model: A tf.keras.Model. | |
loss: A tf.keras.losses.Loss. | |
global_step: A tf.Variable that records the global training step. | |
model_dir: A path to store all experimental artifacts. | |
""" | |
self._strategy = tf.distribute.get_strategy() | |
self._supported_tasks = utils.get_supported_tasks(config) | |
eval_dataset = runner_utils.create_dataset( | |
config.eval_dataset_options, | |
is_training=False, | |
only_semantic_annotations=( | |
common.TASK_PANOPTIC_SEGMENTATION not in self._supported_tasks)) | |
eval_dataset = orbit.utils.make_distributed_dataset(self._strategy, | |
eval_dataset) | |
evaluator_options_override = orbit.StandardEvaluatorOptions( | |
config.evaluator_options.use_tf_function) | |
super(Evaluator, self).__init__(eval_dataset, evaluator_options_override) | |
self._config = config | |
self._model = model | |
self._loss = loss | |
self._global_step = global_step | |
self._sample_counter = 0 | |
self._enable_visualization = config.evaluator_options.save_predictions | |
self._num_vis_samples = config.evaluator_options.num_vis_samples | |
self._save_raw_predictions = config.evaluator_options.save_raw_predictions | |
self._decode_groundtruth_label = ( | |
config.eval_dataset_options.decode_groundtruth_label) | |
if config.evaluator_options.HasField('override_save_dir'): | |
self._vis_dir = config.evaluator_options.override_save_dir | |
else: | |
self._vis_dir = os.path.join(model_dir, 'vis') | |
self._dataset_info = dataset.MAP_NAME_TO_DATASET_INFO[ | |
config.eval_dataset_options.dataset] | |
# Create eval loss metrics. | |
self._eval_loss_metric_dict = runner_utils.create_loss_metric_dict( | |
loss.get_loss_names(), prefix='eval_') | |
# Create metrics (PQ, IoU). | |
self._ignore_label = self._dataset_info.ignore_label | |
self._eval_iou_metric = tf.keras.metrics.MeanIoU( | |
self._dataset_info.num_classes, 'IoU') | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
self._eval_pq_metric = panoptic_quality.PanopticQuality( | |
self._dataset_info.num_classes, | |
self._dataset_info.ignore_label, | |
self._dataset_info.panoptic_label_divisor, | |
offset=_PANOPTIC_METRIC_OFFSET) | |
if common.TASK_INSTANCE_SEGMENTATION in self._supported_tasks: | |
self._eval_ap_metric = instance_ap.PanopticInstanceAveragePrecision( | |
self._dataset_info.num_classes, | |
self._dataset_info.class_has_instances_list, | |
self._dataset_info.panoptic_label_divisor, | |
self._dataset_info.ignore_label) | |
if common.TASK_VIDEO_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
self._eval_tracking_metric = stq.STQuality( | |
self._dataset_info.num_classes, | |
self._dataset_info.class_has_instances_list, | |
self._dataset_info.ignore_label, | |
self._dataset_info.panoptic_label_divisor, | |
offset=_VIDEO_PANOPTIC_METRIC_OFFSET) | |
if (common.TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION | |
in self._supported_tasks): | |
# We compute two-frame video panoptic quality as an additional metric | |
# for the task of depth-aware video panoptic segmentation. | |
self._eval_vpq_metric = vpq.VideoPanopticQuality( | |
self._dataset_info.num_classes, | |
self._dataset_info.ignore_label, | |
self._dataset_info.panoptic_label_divisor, | |
offset=_VIDEO_PANOPTIC_METRIC_OFFSET) | |
def _reset(self): | |
for metric in self._eval_loss_metric_dict.values(): | |
metric.reset_states() | |
self._eval_iou_metric.reset_states() | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
self._eval_pq_metric.reset_states() | |
if common.TASK_INSTANCE_SEGMENTATION in self._supported_tasks: | |
self._eval_ap_metric.reset_states() | |
if common.TASK_VIDEO_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
self._eval_tracking_metric.reset_states() | |
if (common.TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION | |
in self._supported_tasks): | |
self._eval_vpq_metric.reset_states() | |
self._sample_counter = 0 | |
def eval_begin(self): | |
"""Called once at the beginning of the evaluation. | |
This method is called before dataset iterators creation. | |
""" | |
self._reset() | |
tf.io.gfile.makedirs(self._vis_dir) | |
if self._save_raw_predictions: | |
tf.io.gfile.makedirs( | |
os.path.join(self._vis_dir, 'raw_semantic')) | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
tf.io.gfile.makedirs( | |
os.path.join(self._vis_dir, 'raw_panoptic')) | |
def eval_step(self, iterator): | |
"""Implements one step of evaluation. | |
Runs one step of evaluation with respect to the chosen strategy. In case of | |
a distributed strategy, the replica results are gathered and returned. | |
Note that all operations within `_eval_step` are tf.function compatible, as | |
they will be traced with tf.function. Any other/numpy operations are put in | |
`eval_begin`, `eval_end` or `eval_reduce` functions. | |
Args: | |
iterator: A tf.nest-compatible structure of tf.data Iterator or | |
DistributedIterator. | |
Returns: | |
An output which is passed as `step_outputs` argument into `eval_reduce` | |
function. | |
""" | |
def step_fn(inputs): | |
step_outputs = self._eval_step(inputs) | |
return step_outputs | |
distributed_outputs = self._strategy.run(step_fn, args=(next(iterator),)) | |
return tf.nest.map_structure(self._strategy.experimental_local_results, | |
distributed_outputs) | |
def _eval_step(self, inputs): | |
tf.assert_equal(tf.shape(inputs[common.IMAGE])[0], 1, 'Currently only a ' | |
'batchsize of 1 is supported in evaluation due to resizing.' | |
) | |
outputs = self._model(inputs[common.IMAGE], training=False) | |
raw_size = [ | |
inputs[common.GT_SIZE_RAW][0, 0], inputs[common.GT_SIZE_RAW][0, 1] | |
] | |
resized_size = [ | |
tf.shape(inputs[common.RESIZED_IMAGE])[1], | |
tf.shape(inputs[common.RESIZED_IMAGE])[2], | |
] | |
step_outputs = {} | |
if self._decode_groundtruth_label: | |
loss_dict = self._loss(inputs, outputs) | |
# Average over the batch. | |
average_loss_dict = { | |
key: tf.reduce_mean(value) for key, value in loss_dict.items()} | |
for name, value in average_loss_dict.items(): | |
self._eval_loss_metric_dict[name].update_state(value) | |
# We only undo-preprocess for those defined in tuples in model/utils.py. | |
outputs = utils.undo_preprocessing(outputs, resized_size, | |
raw_size) | |
self._eval_iou_metric.update_state( | |
tf.where( | |
tf.equal(inputs[common.GT_SEMANTIC_RAW], self._ignore_label), | |
0, | |
inputs[common.GT_SEMANTIC_RAW]), | |
outputs[common.PRED_SEMANTIC_KEY], | |
tf.where( | |
tf.equal(inputs[common.GT_SEMANTIC_RAW], self._ignore_label), | |
0.0, | |
1.0)) | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
step_outputs[self._eval_pq_metric.name] = ( | |
inputs[common.GT_PANOPTIC_RAW], outputs[common.PRED_PANOPTIC_KEY]) | |
if common.TASK_INSTANCE_SEGMENTATION in self._supported_tasks: | |
step_outputs[self._eval_ap_metric.name] = ( | |
inputs[common.GT_PANOPTIC_RAW], outputs[common.PRED_PANOPTIC_KEY], | |
outputs[common.PRED_SEMANTIC_PROBS_KEY], | |
outputs[common.PRED_INSTANCE_SCORES_KEY], | |
inputs[common.GT_IS_CROWD_RAW]) | |
if (common.TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION | |
in self._supported_tasks): | |
step_outputs[self._eval_vpq_metric.name] = ( | |
inputs[common.GT_PANOPTIC_RAW], | |
inputs[common.GT_NEXT_PANOPTIC_RAW], | |
outputs[common.PRED_PANOPTIC_KEY], | |
outputs[common.PRED_NEXT_PANOPTIC_KEY]) | |
else: | |
# We only undo-preprocess for those defined in tuples in model/utils.py. | |
outputs = utils.undo_preprocessing(outputs, resized_size, | |
raw_size) | |
# We only undo-preprocess for those defined in tuples in model/utils.py. | |
inputs = utils.undo_preprocessing(inputs, resized_size, | |
raw_size) | |
if common.SEQUENCE_ID in inputs: | |
step_outputs[common.SEQUENCE_ID] = inputs[common.SEQUENCE_ID] | |
if self._enable_visualization or self._save_raw_predictions: | |
step_outputs[_PREDICTIONS_KEY] = outputs | |
step_outputs[_LABELS_KEY] = inputs | |
return step_outputs | |
def eval_end(self, state=None): | |
"""Called at the end of the evaluation. | |
Args: | |
state: The outputs from `eval_reduce` after the last eval step. | |
Returns: | |
A dictionary of `Tensors`, which will be written to logs and as | |
TensorBoard summaries. | |
""" | |
if not self._decode_groundtruth_label: | |
return {} | |
eval_logs = {} | |
for loss_metric in self._eval_loss_metric_dict.values(): | |
eval_logs['losses/' + loss_metric.name] = loss_metric.result() | |
eval_logs['evaluation/iou/' + self._eval_iou_metric.name] = ( | |
self._eval_iou_metric.result()) | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
pq_results = self._eval_pq_metric.result() | |
eval_logs['evaluation/pq/PQ'] = pq_results[0] | |
eval_logs['evaluation/pq/SQ'] = pq_results[1] | |
eval_logs['evaluation/pq/RQ'] = pq_results[2] | |
eval_logs['evaluation/pq/TP'] = pq_results[3] | |
eval_logs['evaluation/pq/FN'] = pq_results[4] | |
eval_logs['evaluation/pq/FP'] = pq_results[5] | |
if common.TASK_INSTANCE_SEGMENTATION in self._supported_tasks: | |
ap_results = self._eval_ap_metric.result() | |
eval_logs['evaluation/ap/AP_Mask'] = ap_results[0] | |
if self._config.evaluator_options.detailed_ap_metrics: | |
eval_logs['evaluation/ap/AP_Mask_@IoU=0.5'] = ap_results[1] | |
eval_logs['evaluation/ap/AP_Mask_@IoU=0.75'] = ap_results[2] | |
eval_logs['evaluation/ap/AP_Mask_small'] = ap_results[3] | |
eval_logs['evaluation/ap/AP_Mask_medium'] = ap_results[4] | |
eval_logs['evaluation/ap/AP_Mask_large'] = ap_results[5] | |
eval_logs['evaluation/ap/AR_Mask_maxdets=1'] = ap_results[6] | |
eval_logs['evaluation/ap/AR_Mask_maxdets=10'] = ap_results[7] | |
eval_logs['evaluation/ap/AR_Mask_maxdets=100'] = ap_results[8] | |
eval_logs['evaluation/ap/AR_Mask_small'] = ap_results[9] | |
eval_logs['evaluation/ap/AR_Mask_medium'] = ap_results[10] | |
eval_logs['evaluation/ap/AR_Mask_large'] = ap_results[11] | |
if common.TASK_VIDEO_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
tracking_results = self._eval_tracking_metric.result() | |
eval_logs['evaluation/step/STQ'] = tracking_results['STQ'] | |
eval_logs['evaluation/step/AQ'] = tracking_results['AQ'] | |
eval_logs['evaluation/step/IoU'] = tracking_results['IoU'] | |
if (common.TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION | |
in self._supported_tasks): | |
vpq_results = self._eval_vpq_metric.result() | |
eval_logs['evaluation/vpq_2frames/PQ'] = vpq_results[0] | |
eval_logs['evaluation/vpq_2frames/SQ'] = vpq_results[1] | |
eval_logs['evaluation/vpq_2frames/RQ'] = vpq_results[2] | |
eval_logs['evaluation/vpq_2frames/TP'] = vpq_results[3] | |
eval_logs['evaluation/vpq_2frames/FN'] = vpq_results[4] | |
eval_logs['evaluation/vpq_2frames/FP'] = vpq_results[5] | |
return eval_logs | |
def eval_reduce(self, state=None, step_outputs=None): | |
"""A function to do the reduction on the evaluation outputs per step. | |
Args: | |
state: A maintained state throughout the evaluation. | |
step_outputs: Outputs from the current evaluation step. | |
Returns: | |
An output which is passed as `state` argument into `eval_reduce` function | |
for the next step. After evaluation is finished, the output from last step | |
will be passed into `eval_end` function. | |
""" | |
if self._save_raw_predictions: | |
sequence = None | |
if self._dataset_info.is_video_dataset: | |
sequence = step_outputs[_LABELS_KEY][common.SEQUENCE_ID][0][0] | |
vis.store_raw_predictions( | |
step_outputs[_PREDICTIONS_KEY], | |
step_outputs[_LABELS_KEY][common.IMAGE_NAME][0][0], | |
self._dataset_info, | |
self._vis_dir, | |
sequence, | |
raw_panoptic_format=( | |
self._config.evaluator_options.raw_panoptic_format), | |
convert_to_eval=self._config.evaluator_options.convert_raw_to_eval_ids | |
) | |
if not self._decode_groundtruth_label: | |
# The followed operations will all require decoding groundtruth label, and | |
# thus we will simply return if decode_groundtruth_label is False. | |
return state | |
if (self._enable_visualization and | |
(self._sample_counter < self._num_vis_samples)): | |
predictions = step_outputs[_PREDICTIONS_KEY] | |
inputs = step_outputs[_LABELS_KEY] | |
if self._dataset_info.is_video_dataset: | |
inputs[common.IMAGE] = tf.expand_dims(inputs[common.IMAGE][0][..., :3], | |
axis=0) | |
vis.store_predictions( | |
predictions, | |
inputs, | |
self._sample_counter, | |
self._dataset_info, | |
self._vis_dir) | |
self._sample_counter += 1 | |
# Accumulates PQ, AP_Mask and STQ. | |
if common.TASK_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
for gt_panoptic, pred_panoptic in zip( | |
step_outputs[self._eval_pq_metric.name][0], | |
step_outputs[self._eval_pq_metric.name][1]): | |
batch_size = tf.shape(gt_panoptic)[0] | |
for i in range(batch_size): | |
self._eval_pq_metric.update_state(gt_panoptic[i], pred_panoptic[i]) | |
# STQ. | |
if common.TASK_VIDEO_PANOPTIC_SEGMENTATION in self._supported_tasks: | |
self._eval_tracking_metric.update_state( | |
gt_panoptic[i], pred_panoptic[i], | |
step_outputs[common.SEQUENCE_ID][0][0].numpy()) | |
if common.TASK_INSTANCE_SEGMENTATION in self._supported_tasks: | |
# AP_Mask. | |
for ap_result in zip(*tuple(step_outputs[self._eval_ap_metric.name])): | |
(gt_panoptic, pred_panoptic, pred_semantic_probs, pred_instance_scores, | |
gt_is_crowd) = ap_result | |
batch_size = tf.shape(gt_panoptic)[0] | |
for i in range(batch_size): | |
self._eval_ap_metric.update_state(gt_panoptic[i], pred_panoptic[i], | |
pred_semantic_probs[i], | |
pred_instance_scores[i], | |
gt_is_crowd[i]) | |
if (common.TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION | |
in self._supported_tasks): | |
for vpq_result in zip(*tuple(step_outputs[self._eval_vpq_metric.name])): | |
(gt_panoptic, gt_next_panoptic, pred_panoptic, | |
pred_next_panoptic) = vpq_result | |
batch_size = tf.shape(gt_panoptic)[0] | |
for i in range(batch_size): | |
self._eval_vpq_metric.update_state( | |
[gt_panoptic[i], gt_next_panoptic[i]], | |
[pred_panoptic[i], pred_next_panoptic[i]]) | |
# We simply return state as it is, since our current implementation does not | |
# keep track of state between steps. | |
return state | |