From 7f105fa7ed02d77bf7bd7bfa026f6e6801563288 Mon Sep 17 00:00:00 2001 From: ryan Date: Sun, 15 Mar 2020 23:52:46 -0400 Subject: [PATCH] Initial commit --- LICENSE | 201 + README.md | 73 + .../flexible_grid_anchor_generator.py | 134 + .../flexible_grid_anchor_generator_test.py | 294 + anchor_generators/grid_anchor_generator.py | 213 + .../grid_anchor_generator_test.py | 104 + .../multiple_grid_anchor_generator.py | 342 + .../multiple_grid_anchor_generator_test.py | 289 + .../multiscale_grid_anchor_generator.py | 145 + .../multiscale_grid_anchor_generator_test.py | 302 + box_coders/faster_rcnn_box_coder.py | 118 + box_coders/faster_rcnn_box_coder_test.py | 94 + box_coders/keypoint_box_coder.py | 173 + box_coders/keypoint_box_coder_test.py | 140 + box_coders/mean_stddev_box_coder_test.py | 54 + box_coders/square_box_coder.py | 126 + box_coders/square_box_coder_test.py | 97 + builders/anchor_generator_builder.py | 109 + builders/anchor_generator_builder_test.py | 332 + builders/box_coder_builder.py | 66 + builders/box_coder_builder_test.py | 136 + builders/box_predictor_builder.py | 975 ++ builders/box_predictor_builder_test.py | 661 ++ builders/calibration_builder.py | 250 + builders/calibration_builder_test.py | 224 + builders/dataset_builder.py | 162 + builders/dataset_builder_test.py | 356 + builders/graph_rewriter_builder.py | 45 + builders/graph_rewriter_builder_test.py | 58 + builders/hyperparams_builder.py | 418 + builders/hyperparams_builder_test.py | 865 ++ builders/image_resizer_builder.py | 174 + builders/image_resizer_builder_test.py | 216 + builders/input_reader_builder.py | 76 + builders/input_reader_builder_test.py | 129 + builders/losses_builder.py | 252 + builders/losses_builder_test.py | 561 ++ builders/matcher_builder.py | 53 + builders/matcher_builder_test.py | 99 + builders/model_builder.py | 636 ++ builders/model_builder_test.py | 346 + builders/optimizer_builder.py | 201 + builders/optimizer_builder_test.py | 208 + builders/post_processing_builder.py | 181 + builders/post_processing_builder_test.py | 185 + builders/preprocessor_builder.py | 403 + builders/preprocessor_builder_test.py | 728 ++ .../region_similarity_calculator_builder.py | 59 + ...gion_similarity_calculator_builder_test.py | 67 + builders/target_assigner_builder.py | 40 + builders/target_assigner_builder_test.py | 50 + core/__init__.py | 1 + core/anchor_generator.py | 171 + core/balanced_positive_negative_sampler.py | 266 + ...balanced_positive_negative_sampler_test.py | 204 + core/batch_multiclass_nms_test.py | 721 ++ core/batcher.py | 141 + core/batcher_test.py | 163 + core/box_coder.py | 158 + core/box_coder_test.py | 61 + core/box_list.py | 210 + core/box_list_ops.py | 1141 +++ core/box_list_ops_test.py | 1108 +++ core/box_list_test.py | 134 + core/box_predictor.py | 227 + core/class_agnostic_nms_test.py | 155 + core/data_decoder.py | 44 + core/data_parser.py | 45 + core/freezable_batch_norm.py | 68 + core/freezable_batch_norm_test.py | 189 + core/keypoint_ops.py | 282 + core/keypoint_ops_test.py | 200 + core/losses.py | 686 ++ core/losses_test.py | 1281 +++ core/matcher.py | 270 + core/matcher_test.py | 197 + core/minibatch_sampler.py | 94 + core/minibatch_sampler_test.py | 82 + core/model.py | 375 + core/multiclass_nms_test.py | 526 ++ core/post_processing.py | 1223 +++ core/prefetcher.py | 61 + core/prefetcher_test.py | 106 + core/preprocessor.py | 4008 +++++++++ core/preprocessor_cache.py | 107 + core/preprocessor_test.py | 3585 ++++++++ core/region_similarity_calculator.py | 159 + core/region_similarity_calculator_test.py | 95 + core/standard_fields.py | 263 + core/target_assigner.py | 707 ++ core/target_assigner_test.py | 1232 +++ data/coco/coco-2017/readme.txt | 1 + data/coco/coco-ovic/annotations/readme.txt | 1 + data/coco/coco-ovic/coco.py | 345 + data/coco/coco-ovic/genMinval8000.py | 99 + data/coco/coco-ovic/ovic_val_2017_list.txt | 7991 +++++++++++++++++ data/coco/coco-ovic/readme.txt | 8 + data/coco/coco-ovic/removeSmallObj.py | 72 + data/coco/coco-ovic/splitImage.py | 65 + data/coco/coco-ovic/tflite_test.py | 75 + data/coco/coco-ovic/train-val_statistics.py | 82 + data_decoders/tf_example_decoder.py | 492 + data_decoders/tf_example_decoder_test.py | 997 ++ eval_util.py | 978 ++ eval_util_test.py | 310 + export_inference_graph.py | 162 + export_tflite_ssd_graph.py | 143 + export_tflite_ssd_graph_lib.py | 332 + export_tflite_ssd_graph_lib_bak.py | 330 + export_tflite_ssd_graph_lib_test.py | 414 + exporter.py | 543 ++ exporter_test.py | 1144 +++ inputs.py | 780 ++ inputs_test.py | 1277 +++ metrics/calibration_evaluation.py | 228 + metrics/calibration_evaluation_test.py | 200 + metrics/calibration_metrics.py | 118 + metrics/calibration_metrics_test.py | 109 + metrics/coco_evaluation.py | 762 ++ metrics/coco_evaluation_test.py | 951 ++ metrics/coco_tools.py | 856 ++ metrics/coco_tools_test.py | 295 + metrics/io_utils.py | 34 + metrics/offline_eval_map_corloc.py | 171 + metrics/offline_eval_map_corloc_test.py | 58 + metrics/oid_challenge_evaluation.py | 149 + metrics/oid_challenge_evaluation_utils.py | 197 + .../oid_challenge_evaluation_utils_test.py | 306 + metrics/oid_vrd_challenge_evaluation.py | 154 + metrics/oid_vrd_challenge_evaluation_utils.py | 125 + ...oid_vrd_challenge_evaluation_utils_test.py | 149 + metrics/tf_example_parser.py | 159 + metrics/tf_example_parser_test.py | 197 + model_hparams.py | 44 + model_lib.py | 879 ++ model_lib_test.py | 430 + model_lib_v2.py | 803 ++ model_lib_v2_test.py | 104 + model_main.py | 111 + ...n_inception_resnet_v2_feature_extractor.py | 214 + ...eption_resnet_v2_feature_extractor_test.py | 109 + ..._resnet_v2_keras_feature_extractor_test.py | 109 + ...ter_rcnn_inception_v2_feature_extractor.py | 255 + ...ter_rcnn_mobilenet_v1_feature_extractor.py | 195 + ...cnn_mobilenet_v1_feature_extractor_test.py | 126 + ...r_rcnn_resnet_v1_feature_extractor_test.py | 165 + models/feature_map_generators.py | 823 ++ .../base_models/original_mobilenet_v2.py | 479 + models/keras_models/inception_resnet_v2.py | 244 + .../keras_models/inception_resnet_v2_test.py | 223 + models/keras_models/mobilenet_v1.py | 347 + models/keras_models/mobilenet_v1_test.py | 258 + models/keras_models/mobilenet_v2.py | 334 + models/keras_models/mobilenet_v2_test.py | 247 + models/keras_models/model_utils.py | 53 + models/keras_models/resnet_v1.py | 397 + models/keras_models/resnet_v1_test.py | 183 + models/keras_models/test_utils.py | 214 + models/ssd_feature_extractor_test.py | 226 + models/ssd_inception_v2_feature_extractor.py | 138 + ...ssd_inception_v2_feature_extractor_test.py | 157 + ...ssd_inception_v3_feature_extractor_test.py | 157 + models/ssd_mobilenet_v1_feature_extractor.py | 139 + ...ssd_mobilenet_v1_feature_extractor_test.py | 296 + .../ssd_mobilenet_v1_fpn_feature_extractor.py | 198 + .../ssd_mobilenet_v1_ppn_feature_extractor.py | 85 + ...mobilenet_v1_ppn_feature_extractor_test.py | 186 + models/ssd_mobilenet_v2_feature_extractor.py | 141 + ...ssd_mobilenet_v2_feature_extractor_test.py | 227 + ...mobilenet_v2_fpn_feature_extractor_test.py | 435 + ...sd_mobilenet_v2_keras_feature_extractor.py | 167 + models/ssd_mobilenet_v3_feature_extractor.py | 220 + ...ssd_mobilenet_v3_feature_extractor_test.py | 106 + ...mobilenet_v3_feature_extractor_testbase.py | 116 + models/ssd_pnasnet_feature_extractor.py | 180 + models/ssd_pnasnet_feature_extractor_test.py | 108 + models/ssd_resnet_v1_fpn_feature_extractor.py | 387 + ...esnet_v1_fpn_feature_extractor_testbase.py | 184 + ...sd_resnet_v1_ppn_feature_extractor_test.py | 88 + ...esnet_v1_ppn_feature_extractor_testbase.py | 82 + predictors/convolutional_box_predictor.py | 416 + .../convolutional_box_predictor_test.py | 922 ++ .../convolutional_keras_box_predictor.py | 476 + .../convolutional_keras_box_predictor_test.py | 908 ++ predictors/heads/box_head.py | 282 + predictors/heads/box_head_test.py | 127 + predictors/heads/class_head.py | 316 + predictors/heads/class_head_test.py | 194 + predictors/heads/head.py | 81 + predictors/heads/keras_box_head.py | 333 + predictors/heads/keras_box_head_test.py | 184 + predictors/heads/keras_class_head.py | 351 + predictors/heads/keras_class_head_test.py | 191 + predictors/heads/keras_mask_head.py | 441 + predictors/heads/keras_mask_head_test.py | 229 + predictors/heads/keypoint_head.py | 110 + predictors/heads/keypoint_head_test.py | 57 + predictors/heads/mask_head.py | 356 + predictors/heads/mask_head_test.py | 185 + predictors/mask_rcnn_box_predictor.py | 144 + predictors/mask_rcnn_box_predictor_test.py | 151 + predictors/mask_rcnn_keras_box_predictor.py | 139 + .../mask_rcnn_keras_box_predictor_test.py | 140 + predictors/rfcn_box_predictor.py | 159 + predictors/rfcn_box_predictor_test.py | 77 + predictors/rfcn_keras_box_predictor.py | 204 + predictors/rfcn_keras_box_predictor_test.py | 77 + protos/anchor_generator.proto | 19 + protos/anchor_generator_pb2.py | 114 + protos/argmax_matcher.proto | 29 + protos/argmax_matcher_pb2.py | 104 + protos/bipartite_matcher.proto | 11 + protos/bipartite_matcher_pb2.py | 69 + protos/box_coder.proto | 19 + protos/box_coder_pb2.py | 114 + protos/box_predictor.proto | 200 + protos/box_predictor_pb2.py | 678 ++ protos/calibration.proto | 90 + protos/calibration_pb2.py | 589 ++ protos/eval.proto | 94 + protos/eval_pb2.py | 248 + protos/faster_rcnn.proto | 195 + protos/faster_rcnn_box_coder.proto | 17 + protos/faster_rcnn_box_coder_pb2.py | 90 + protos/faster_rcnn_pb2.py | 403 + protos/flexible_grid_anchor_generator_pb2.py | 151 + protos/graph_rewriter.proto | 27 + protos/graph_rewriter_pb2.py | 130 + protos/grid_anchor_generator.proto | 34 + protos/grid_anchor_generator_pb2.py | 118 + protos/hyperparams.proto | 127 + protos/hyperparams_pb2.py | 654 ++ protos/image_resizer.proto | 92 + protos/image_resizer_pb2.py | 404 + protos/input_reader.proto | 126 + protos/input_reader_pb2.py | 353 + protos/keypoint_box_coder.proto | 19 + protos/keypoint_box_coder_pb2.py | 97 + protos/losses.proto | 202 + protos/losses_pb2.py | 859 ++ protos/matcher.proto | 15 + protos/matcher_pb2.py | 90 + protos/mean_stddev_box_coder.proto | 10 + protos/mean_stddev_box_coder_pb2.py | 69 + protos/model.proto | 24 + protos/model_pb2.py | 140 + protos/multiscale_anchor_generator.proto | 26 + protos/multiscale_anchor_generator_pb2.py | 104 + protos/optimizer.proto | 92 + protos/optimizer_pb2.py | 626 ++ protos/pipeline.proto | 22 + protos/pipeline_pb2.py | 116 + protos/post_processing.proto | 77 + protos/post_processing_pb2.py | 225 + protos/preprocessor.proto | 535 ++ protos/preprocessor_pb2.py | 2618 ++++++ protos/region_similarity_calculator.proto | 33 + protos/region_similarity_calculator_pb2.py | 244 + protos/square_box_coder.proto | 14 + protos/square_box_coder_pb2.py | 83 + protos/ssd.proto | 226 + protos/ssd_anchor_generator.proto | 55 + protos/ssd_anchor_generator_pb2.py | 153 + protos/ssd_pb2.py | 503 ++ protos/string_int_label_map.proto | 24 + protos/string_int_label_map_pb2.py | 123 + protos/train.proto | 122 + protos/train_pb2.py | 266 + ..._v2_quantized_300x300_coco_check_19.config | 206 + ..._v2_quantized_300x300_coco_check_43.config | 205 + ...t_v2_quantized_300x300_coco_check_9.config | 205 + tmp/model_pretrained/readme.txt | 1 + tmp/model_trained/readme.txt | 1 + utils/autoaugment_utils.py | 1639 ++++ utils/category_util.py | 77 + utils/category_util_test.py | 59 + utils/config_util.py | 999 +++ utils/config_util_test.py | 933 ++ utils/context_manager.py | 39 + utils/context_manager_test.py | 33 + utils/dataset_util.py | 90 + utils/dataset_util_test.py | 41 + utils/json_utils.py | 87 + utils/json_utils_test.py | 97 + utils/label_map_util.py | 249 + utils/label_map_util_test.py | 356 + utils/learning_schedules_test.py | 161 + utils/metrics.py | 193 + utils/metrics_test.py | 147 + utils/model_util.py | 92 + utils/model_util_test.py | 58 + utils/np_box_list.py | 137 + utils/np_box_list_ops.py | 563 ++ utils/np_box_list_ops_test.py | 418 + utils/np_box_list_test.py | 139 + utils/np_box_mask_list.py | 66 + utils/np_box_mask_list_ops.py | 404 + utils/np_box_mask_list_ops_test.py | 195 + utils/np_box_mask_list_test.py | 186 + utils/np_box_ops.py | 102 + utils/np_box_ops_test.py | 72 + utils/np_mask_ops.py | 124 + utils/np_mask_ops_test.py | 92 + utils/object_detection_evaluation.py | 1335 +++ utils/object_detection_evaluation_test.py | 1110 +++ utils/ops.py | 1095 +++ utils/ops_test.py | 1440 +++ utils/patch_ops.py | 85 + utils/patch_ops_test.py | 139 + utils/per_image_evaluation_test.py | 718 ++ utils/per_image_vrd_evaluation.py | 229 + utils/per_image_vrd_evaluation_test.py | 99 + utils/shape_utils.py | 498 + utils/shape_utils_test.py | 415 + utils/spatial_transform_ops.py | 481 + utils/spatial_transform_ops_test.py | 554 ++ utils/static_shape.py | 90 + utils/static_shape_test.py | 54 + utils/test_case.py | 108 + utils/test_utils.py | 235 + utils/test_utils_test.py | 93 + utils/variables_helper.py | 179 + utils/variables_helper_test.py | 244 + utils/visualization_utils.py | 1098 +++ utils/visualization_utils_test.py | 451 + utils/vrd_evaluation.py | 588 ++ utils/vrd_evaluation_test.py | 261 + 327 files changed, 106620 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 anchor_generators/flexible_grid_anchor_generator.py create mode 100644 anchor_generators/flexible_grid_anchor_generator_test.py create mode 100644 anchor_generators/grid_anchor_generator.py create mode 100644 anchor_generators/grid_anchor_generator_test.py create mode 100644 anchor_generators/multiple_grid_anchor_generator.py create mode 100644 anchor_generators/multiple_grid_anchor_generator_test.py create mode 100644 anchor_generators/multiscale_grid_anchor_generator.py create mode 100644 anchor_generators/multiscale_grid_anchor_generator_test.py create mode 100644 box_coders/faster_rcnn_box_coder.py create mode 100644 box_coders/faster_rcnn_box_coder_test.py create mode 100644 box_coders/keypoint_box_coder.py create mode 100644 box_coders/keypoint_box_coder_test.py create mode 100644 box_coders/mean_stddev_box_coder_test.py create mode 100644 box_coders/square_box_coder.py create mode 100644 box_coders/square_box_coder_test.py create mode 100644 builders/anchor_generator_builder.py create mode 100644 builders/anchor_generator_builder_test.py create mode 100644 builders/box_coder_builder.py create mode 100644 builders/box_coder_builder_test.py create mode 100644 builders/box_predictor_builder.py create mode 100644 builders/box_predictor_builder_test.py create mode 100644 builders/calibration_builder.py create mode 100644 builders/calibration_builder_test.py create mode 100644 builders/dataset_builder.py create mode 100644 builders/dataset_builder_test.py create mode 100644 builders/graph_rewriter_builder.py create mode 100644 builders/graph_rewriter_builder_test.py create mode 100644 builders/hyperparams_builder.py create mode 100644 builders/hyperparams_builder_test.py create mode 100644 builders/image_resizer_builder.py create mode 100644 builders/image_resizer_builder_test.py create mode 100644 builders/input_reader_builder.py create mode 100644 builders/input_reader_builder_test.py create mode 100644 builders/losses_builder.py create mode 100644 builders/losses_builder_test.py create mode 100644 builders/matcher_builder.py create mode 100644 builders/matcher_builder_test.py create mode 100644 builders/model_builder.py create mode 100644 builders/model_builder_test.py create mode 100644 builders/optimizer_builder.py create mode 100644 builders/optimizer_builder_test.py create mode 100644 builders/post_processing_builder.py create mode 100644 builders/post_processing_builder_test.py create mode 100644 builders/preprocessor_builder.py create mode 100644 builders/preprocessor_builder_test.py create mode 100644 builders/region_similarity_calculator_builder.py create mode 100644 builders/region_similarity_calculator_builder_test.py create mode 100644 builders/target_assigner_builder.py create mode 100644 builders/target_assigner_builder_test.py create mode 100644 core/__init__.py create mode 100644 core/anchor_generator.py create mode 100644 core/balanced_positive_negative_sampler.py create mode 100644 core/balanced_positive_negative_sampler_test.py create mode 100644 core/batch_multiclass_nms_test.py create mode 100644 core/batcher.py create mode 100644 core/batcher_test.py create mode 100644 core/box_coder.py create mode 100644 core/box_coder_test.py create mode 100644 core/box_list.py create mode 100644 core/box_list_ops.py create mode 100644 core/box_list_ops_test.py create mode 100644 core/box_list_test.py create mode 100644 core/box_predictor.py create mode 100644 core/class_agnostic_nms_test.py create mode 100644 core/data_decoder.py create mode 100644 core/data_parser.py create mode 100644 core/freezable_batch_norm.py create mode 100644 core/freezable_batch_norm_test.py create mode 100644 core/keypoint_ops.py create mode 100644 core/keypoint_ops_test.py create mode 100644 core/losses.py create mode 100644 core/losses_test.py create mode 100644 core/matcher.py create mode 100644 core/matcher_test.py create mode 100644 core/minibatch_sampler.py create mode 100644 core/minibatch_sampler_test.py create mode 100644 core/model.py create mode 100644 core/multiclass_nms_test.py create mode 100644 core/post_processing.py create mode 100644 core/prefetcher.py create mode 100644 core/prefetcher_test.py create mode 100644 core/preprocessor.py create mode 100644 core/preprocessor_cache.py create mode 100644 core/preprocessor_test.py create mode 100644 core/region_similarity_calculator.py create mode 100644 core/region_similarity_calculator_test.py create mode 100644 core/standard_fields.py create mode 100644 core/target_assigner.py create mode 100644 core/target_assigner_test.py create mode 100644 data/coco/coco-2017/readme.txt create mode 100644 data/coco/coco-ovic/annotations/readme.txt create mode 100644 data/coco/coco-ovic/coco.py create mode 100644 data/coco/coco-ovic/genMinval8000.py create mode 100644 data/coco/coco-ovic/ovic_val_2017_list.txt create mode 100644 data/coco/coco-ovic/readme.txt create mode 100644 data/coco/coco-ovic/removeSmallObj.py create mode 100644 data/coco/coco-ovic/splitImage.py create mode 100644 data/coco/coco-ovic/tflite_test.py create mode 100644 data/coco/coco-ovic/train-val_statistics.py create mode 100644 data_decoders/tf_example_decoder.py create mode 100644 data_decoders/tf_example_decoder_test.py create mode 100644 eval_util.py create mode 100644 eval_util_test.py create mode 100644 export_inference_graph.py create mode 100644 export_tflite_ssd_graph.py create mode 100644 export_tflite_ssd_graph_lib.py create mode 100644 export_tflite_ssd_graph_lib_bak.py create mode 100644 export_tflite_ssd_graph_lib_test.py create mode 100644 exporter.py create mode 100644 exporter_test.py create mode 100644 inputs.py create mode 100644 inputs_test.py create mode 100644 metrics/calibration_evaluation.py create mode 100644 metrics/calibration_evaluation_test.py create mode 100644 metrics/calibration_metrics.py create mode 100644 metrics/calibration_metrics_test.py create mode 100644 metrics/coco_evaluation.py create mode 100644 metrics/coco_evaluation_test.py create mode 100644 metrics/coco_tools.py create mode 100644 metrics/coco_tools_test.py create mode 100644 metrics/io_utils.py create mode 100644 metrics/offline_eval_map_corloc.py create mode 100644 metrics/offline_eval_map_corloc_test.py create mode 100644 metrics/oid_challenge_evaluation.py create mode 100644 metrics/oid_challenge_evaluation_utils.py create mode 100644 metrics/oid_challenge_evaluation_utils_test.py create mode 100644 metrics/oid_vrd_challenge_evaluation.py create mode 100644 metrics/oid_vrd_challenge_evaluation_utils.py create mode 100644 metrics/oid_vrd_challenge_evaluation_utils_test.py create mode 100644 metrics/tf_example_parser.py create mode 100644 metrics/tf_example_parser_test.py create mode 100644 model_hparams.py create mode 100644 model_lib.py create mode 100644 model_lib_test.py create mode 100644 model_lib_v2.py create mode 100644 model_lib_v2_test.py create mode 100644 model_main.py create mode 100644 models/faster_rcnn_inception_resnet_v2_feature_extractor.py create mode 100644 models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py create mode 100644 models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py create mode 100644 models/faster_rcnn_inception_v2_feature_extractor.py create mode 100644 models/faster_rcnn_mobilenet_v1_feature_extractor.py create mode 100644 models/faster_rcnn_mobilenet_v1_feature_extractor_test.py create mode 100644 models/faster_rcnn_resnet_v1_feature_extractor_test.py create mode 100644 models/feature_map_generators.py create mode 100644 models/keras_models/base_models/original_mobilenet_v2.py create mode 100644 models/keras_models/inception_resnet_v2.py create mode 100644 models/keras_models/inception_resnet_v2_test.py create mode 100644 models/keras_models/mobilenet_v1.py create mode 100644 models/keras_models/mobilenet_v1_test.py create mode 100644 models/keras_models/mobilenet_v2.py create mode 100644 models/keras_models/mobilenet_v2_test.py create mode 100644 models/keras_models/model_utils.py create mode 100644 models/keras_models/resnet_v1.py create mode 100644 models/keras_models/resnet_v1_test.py create mode 100644 models/keras_models/test_utils.py create mode 100644 models/ssd_feature_extractor_test.py create mode 100644 models/ssd_inception_v2_feature_extractor.py create mode 100644 models/ssd_inception_v2_feature_extractor_test.py create mode 100644 models/ssd_inception_v3_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v1_feature_extractor.py create mode 100644 models/ssd_mobilenet_v1_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v1_fpn_feature_extractor.py create mode 100644 models/ssd_mobilenet_v1_ppn_feature_extractor.py create mode 100644 models/ssd_mobilenet_v1_ppn_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v2_feature_extractor.py create mode 100644 models/ssd_mobilenet_v2_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v2_fpn_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v2_keras_feature_extractor.py create mode 100644 models/ssd_mobilenet_v3_feature_extractor.py create mode 100644 models/ssd_mobilenet_v3_feature_extractor_test.py create mode 100644 models/ssd_mobilenet_v3_feature_extractor_testbase.py create mode 100644 models/ssd_pnasnet_feature_extractor.py create mode 100644 models/ssd_pnasnet_feature_extractor_test.py create mode 100644 models/ssd_resnet_v1_fpn_feature_extractor.py create mode 100644 models/ssd_resnet_v1_fpn_feature_extractor_testbase.py create mode 100644 models/ssd_resnet_v1_ppn_feature_extractor_test.py create mode 100644 models/ssd_resnet_v1_ppn_feature_extractor_testbase.py create mode 100644 predictors/convolutional_box_predictor.py create mode 100644 predictors/convolutional_box_predictor_test.py create mode 100644 predictors/convolutional_keras_box_predictor.py create mode 100644 predictors/convolutional_keras_box_predictor_test.py create mode 100644 predictors/heads/box_head.py create mode 100644 predictors/heads/box_head_test.py create mode 100644 predictors/heads/class_head.py create mode 100644 predictors/heads/class_head_test.py create mode 100644 predictors/heads/head.py create mode 100644 predictors/heads/keras_box_head.py create mode 100644 predictors/heads/keras_box_head_test.py create mode 100644 predictors/heads/keras_class_head.py create mode 100644 predictors/heads/keras_class_head_test.py create mode 100644 predictors/heads/keras_mask_head.py create mode 100644 predictors/heads/keras_mask_head_test.py create mode 100644 predictors/heads/keypoint_head.py create mode 100644 predictors/heads/keypoint_head_test.py create mode 100644 predictors/heads/mask_head.py create mode 100644 predictors/heads/mask_head_test.py create mode 100644 predictors/mask_rcnn_box_predictor.py create mode 100644 predictors/mask_rcnn_box_predictor_test.py create mode 100644 predictors/mask_rcnn_keras_box_predictor.py create mode 100644 predictors/mask_rcnn_keras_box_predictor_test.py create mode 100644 predictors/rfcn_box_predictor.py create mode 100644 predictors/rfcn_box_predictor_test.py create mode 100644 predictors/rfcn_keras_box_predictor.py create mode 100644 predictors/rfcn_keras_box_predictor_test.py create mode 100644 protos/anchor_generator.proto create mode 100644 protos/anchor_generator_pb2.py create mode 100644 protos/argmax_matcher.proto create mode 100644 protos/argmax_matcher_pb2.py create mode 100644 protos/bipartite_matcher.proto create mode 100644 protos/bipartite_matcher_pb2.py create mode 100644 protos/box_coder.proto create mode 100644 protos/box_coder_pb2.py create mode 100644 protos/box_predictor.proto create mode 100644 protos/box_predictor_pb2.py create mode 100644 protos/calibration.proto create mode 100644 protos/calibration_pb2.py create mode 100644 protos/eval.proto create mode 100644 protos/eval_pb2.py create mode 100644 protos/faster_rcnn.proto create mode 100644 protos/faster_rcnn_box_coder.proto create mode 100644 protos/faster_rcnn_box_coder_pb2.py create mode 100644 protos/faster_rcnn_pb2.py create mode 100644 protos/flexible_grid_anchor_generator_pb2.py create mode 100644 protos/graph_rewriter.proto create mode 100644 protos/graph_rewriter_pb2.py create mode 100644 protos/grid_anchor_generator.proto create mode 100644 protos/grid_anchor_generator_pb2.py create mode 100644 protos/hyperparams.proto create mode 100644 protos/hyperparams_pb2.py create mode 100644 protos/image_resizer.proto create mode 100644 protos/image_resizer_pb2.py create mode 100644 protos/input_reader.proto create mode 100644 protos/input_reader_pb2.py create mode 100644 protos/keypoint_box_coder.proto create mode 100644 protos/keypoint_box_coder_pb2.py create mode 100644 protos/losses.proto create mode 100644 protos/losses_pb2.py create mode 100644 protos/matcher.proto create mode 100644 protos/matcher_pb2.py create mode 100644 protos/mean_stddev_box_coder.proto create mode 100644 protos/mean_stddev_box_coder_pb2.py create mode 100644 protos/model.proto create mode 100644 protos/model_pb2.py create mode 100644 protos/multiscale_anchor_generator.proto create mode 100644 protos/multiscale_anchor_generator_pb2.py create mode 100644 protos/optimizer.proto create mode 100644 protos/optimizer_pb2.py create mode 100644 protos/pipeline.proto create mode 100644 protos/pipeline_pb2.py create mode 100644 protos/post_processing.proto create mode 100644 protos/post_processing_pb2.py create mode 100644 protos/preprocessor.proto create mode 100644 protos/preprocessor_pb2.py create mode 100644 protos/region_similarity_calculator.proto create mode 100644 protos/region_similarity_calculator_pb2.py create mode 100644 protos/square_box_coder.proto create mode 100644 protos/square_box_coder_pb2.py create mode 100644 protos/ssd.proto create mode 100644 protos/ssd_anchor_generator.proto create mode 100644 protos/ssd_anchor_generator_pb2.py create mode 100644 protos/ssd_pb2.py create mode 100644 protos/string_int_label_map.proto create mode 100644 protos/string_int_label_map_pb2.py create mode 100644 protos/train.proto create mode 100644 protos/train_pb2.py create mode 100644 samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_19.config create mode 100644 samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_43.config create mode 100644 samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_9.config create mode 100644 tmp/model_pretrained/readme.txt create mode 100644 tmp/model_trained/readme.txt create mode 100644 utils/autoaugment_utils.py create mode 100644 utils/category_util.py create mode 100644 utils/category_util_test.py create mode 100644 utils/config_util.py create mode 100644 utils/config_util_test.py create mode 100644 utils/context_manager.py create mode 100644 utils/context_manager_test.py create mode 100644 utils/dataset_util.py create mode 100644 utils/dataset_util_test.py create mode 100644 utils/json_utils.py create mode 100644 utils/json_utils_test.py create mode 100644 utils/label_map_util.py create mode 100644 utils/label_map_util_test.py create mode 100644 utils/learning_schedules_test.py create mode 100644 utils/metrics.py create mode 100644 utils/metrics_test.py create mode 100644 utils/model_util.py create mode 100644 utils/model_util_test.py create mode 100644 utils/np_box_list.py create mode 100644 utils/np_box_list_ops.py create mode 100644 utils/np_box_list_ops_test.py create mode 100644 utils/np_box_list_test.py create mode 100644 utils/np_box_mask_list.py create mode 100644 utils/np_box_mask_list_ops.py create mode 100644 utils/np_box_mask_list_ops_test.py create mode 100644 utils/np_box_mask_list_test.py create mode 100644 utils/np_box_ops.py create mode 100644 utils/np_box_ops_test.py create mode 100644 utils/np_mask_ops.py create mode 100644 utils/np_mask_ops_test.py create mode 100644 utils/object_detection_evaluation.py create mode 100644 utils/object_detection_evaluation_test.py create mode 100644 utils/ops.py create mode 100644 utils/ops_test.py create mode 100644 utils/patch_ops.py create mode 100644 utils/patch_ops_test.py create mode 100644 utils/per_image_evaluation_test.py create mode 100644 utils/per_image_vrd_evaluation.py create mode 100644 utils/per_image_vrd_evaluation_test.py create mode 100644 utils/shape_utils.py create mode 100644 utils/shape_utils_test.py create mode 100644 utils/spatial_transform_ops.py create mode 100644 utils/spatial_transform_ops_test.py create mode 100644 utils/static_shape.py create mode 100644 utils/static_shape_test.py create mode 100644 utils/test_case.py create mode 100644 utils/test_utils.py create mode 100644 utils/test_utils_test.py create mode 100644 utils/variables_helper.py create mode 100644 utils/variables_helper_test.py create mode 100644 utils/visualization_utils.py create mode 100644 utils/visualization_utils_test.py create mode 100644 utils/vrd_evaluation.py create mode 100644 utils/vrd_evaluation_test.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b655120 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# LPCVC-2019 +LPCVC-2019 object-detection track by Orange-Control, implemented by Tensorflow (TF>=1.12). + +### Installation +The installation requirements are basically the same as that of the offical object-detection API of Tensorflow, please refer to [Installation.md](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md). + +### Dataset Preparation + 1. Download [COCO-2017 dataset](http://cocodataset.org/#download) for Object Detection Task, in ./data/coco-2017. + + 2. Run genMinval8000.py in ./data/coco/coco-ovic to generate annotation files (i.e., ./annotations/instances_train_L.json and ./annotations/instances_minval_L.json) for training and validataion according to ovic_val_2017_list.txt required by [OVIC](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt). + + 3. Run splitImage.py in ./data/coco/coco-ovic to split images according to ovic_val_2017_list.txt. The splitted images are stored in ./data/coco/coco-ovic/val_ovic and ./data/coco/coco-ovic/train_ovic. + + 4. Run removeSmallObj.py in ./data/coco/coco-ovic to generate annotation file (i.e., ./annotations/instances_train_L_256.json) for images with area larger or equal to 256 for the removal of small objects. + +### Training +##### Stage1: Load pretrained model ([ssd_mobilenet_v2_quantized_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz)) and train for 200K with lr 0.0004 with input size 320x320 --> AP 20.7 + + CUDA_VISIBLE_DEVICES=0 python ./LPCVC-2019/model_main.py \ + --pipeline_config_path=./LPCVC-2019/samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_9.config \ + --model_dir=./LPCVC-2019/tmp/model_trained/lpcvc4_check_9/ \ + --num_train_steps=200000 \ + --sample_1_of_n_eval_examples=8 \ + --alsologtostderr + +##### Stage2: Load model from Stage1 and train for 70K with lr 0.00005 --> AP 21.1 + + CUDA_VISIBLE_DEVICES=0 python ./LPCVC-2019/model_main.py \ + --pipeline_config_path=./LPCVC-2019/samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_19.config \ + --model_dir=./LPCVC-2019/tmp/model_trained/lpcvc4_check_19/ \ + --num_train_steps=1000000 \ + --sample_1_of_n_eval_examples=8 \ + --alsologtostderr + +##### Stage3: Load model from Stage2, change min_scale of anchor_generator to 0.15, and train with instances_train_L_256.json. Train for 150K with lr 0.0004 (AP 21.1), for 150K with lr 0.00005 (AP 21.6), for 250K with lr 0.00001 --> AP 21.7 + + CUDA_VISIBLE_DEVICES=0 python ./LPCVC-2019/model_main.py \ + --pipeline_config_path=./LPCVC-2019/samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_43.config \ + --model_dir=./LPCVC-2019/tmp/model_trained/lpcvc4_check_43/ \ + --num_train_steps=1000000 \ + --sample_1_of_n_eval_examples=8 \ + --alsologtostderr + + + +### Testing +##### Convert .ckpt to .pb for tflite + + python ./LPCVC-2019/export_tflite_ssd_graph.py \ + --pipeline_config_path=./LPCVC-2019/samples/configs/ssd_mobilenet_v2_quantized_300x300_coco_check_43.config \ + --trained_checkpoint_prefix=./LPCVC-2019/tmp/model_trained/lpcvc4_check_43/model.ckpt-560197 \ + --output_directory=./LPCVC-2019/tmp/model_trained/lpcvc4_check_43/tflite/560197/non_34_convert/ \ + --max_detections=100 \ + --add_postprocessing_op=true + +##### Convert .pb to .tflite + + tflite_convert --graph_def_file=./LPCVC-2019/tmp/model_trained/lpcvc4_check_43/tflite/560197/non_34_convert/tflite_graph.pb \ + --output_format=TFLITE \ + --output_file=./LPCVC-2019/tmp/model_trained/lpcvc4_check_43/tflite/560197/non_34_convert/model.tflite \ + --input_shapes=1,320,320,3 \ + --input_arrays=normalized_input_image_tensor \ + --output_arrays='TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1','TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3' \ + --inference_type=QUANTIZED_UINT8 \ + --mean_values=128 \ + --std_dev_values=128 \ + --allow_custom_ops + +##### Calculate AP of tflite locally + + Run tflite_test.py in ./data/coco/coco-ovic to test on 7991 images in ./data/coco/coco-ovic/val_ovic. + + diff --git a/anchor_generators/flexible_grid_anchor_generator.py b/anchor_generators/flexible_grid_anchor_generator.py new file mode 100644 index 0000000..352b4a4 --- /dev/null +++ b/anchor_generators/flexible_grid_anchor_generator.py @@ -0,0 +1,134 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Generates grid anchors on the fly corresponding to multiple CNN layers.""" + +import tensorflow as tf + +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.core import anchor_generator +from object_detection.core import box_list_ops + + +class FlexibleGridAnchorGenerator(anchor_generator.AnchorGenerator): + """Generate a grid of anchors for multiple CNN layers of different scale.""" + + def __init__(self, base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=True): + """Constructs a FlexibleGridAnchorGenerator. + + This generator is more flexible than the multiple_grid_anchor_generator + and multiscale_grid_anchor_generator, and can generate any of the anchors + that they can generate, plus additional anchor configurations. In + particular, it allows the explicit specification of scale and aspect ratios + at each layer without making any assumptions between the relationship + between scales and aspect ratios between layers. + + Args: + base_sizes: list of tuples of anchor base sizes. For example, setting + base_sizes=[(1, 2, 3), (4, 5)] means that we want 3 anchors at each + grid point on the first layer with the base sizes of 1, 2, and 3, and 2 + anchors at each grid point on the second layer with the base sizes of + 4 and 5. + aspect_ratios: list or tuple of aspect ratios. For example, setting + aspect_ratios=[(1.0, 2.0, 0.5), (1.0, 2.0)] means that we want 3 anchors + at each grid point on the first layer with aspect ratios of 1.0, 2.0, + and 0.5, and 2 anchors at each grid point on the sercond layer with the + base sizes of 1.0 and 2.0. + anchor_strides: list of pairs of strides in pixels (in y and x directions + respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] + means that we want the anchors corresponding to the first layer to be + strided by 25 pixels and those in the second layer to be strided by 50 + pixels in both y and x directions. + anchor_offsets: list of pairs of offsets in pixels (in y and x directions + respectively). The offset specifies where we want the center of the + (0, 0)-th anchor to lie for each layer. For example, setting + anchor_offsets=[(10, 10), (20, 20)]) means that we want the + (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space + and likewise that we want the (0, 0)-th anchor of the second layer to + lie at (25, 25) in pixel space. + normalize_coordinates: whether to produce anchors in normalized + coordinates. (defaults to True). + """ + self._base_sizes = base_sizes + self._aspect_ratios = aspect_ratios + self._anchor_strides = anchor_strides + self._anchor_offsets = anchor_offsets + self._normalize_coordinates = normalize_coordinates + + def name_scope(self): + return 'FlexibleGridAnchorGenerator' + + def num_anchors_per_location(self): + """Returns the number of anchors per spatial location. + + Returns: + a list of integers, one for each expected feature map to be passed to + the Generate function. + """ + return [len(size) for size in self._base_sizes] + + def _generate(self, feature_map_shape_list, im_height=1, im_width=1): + """Generates a collection of bounding boxes to be used as anchors. + + Currently we require the input image shape to be statically defined. That + is, im_height and im_width should be integers rather than tensors. + + Args: + feature_map_shape_list: list of pairs of convnet layer resolutions in the + format [(height_0, width_0), (height_1, width_1), ...]. For example, + setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that + correspond to an 8x8 layer followed by a 7x7 layer. + im_height: the height of the image to generate the grid for. If both + im_height and im_width are 1, anchors can only be generated in + absolute coordinates. + im_width: the width of the image to generate the grid for. If both + im_height and im_width are 1, anchors can only be generated in + absolute coordinates. + + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + Raises: + ValueError: if im_height and im_width are 1, but normalized coordinates + were requested. + """ + anchor_grid_list = [] + for (feat_shape, base_sizes, aspect_ratios, anchor_stride, anchor_offset + ) in zip(feature_map_shape_list, self._base_sizes, self._aspect_ratios, + self._anchor_strides, self._anchor_offsets): + anchor_grid = grid_anchor_generator.tile_anchors( + feat_shape[0], + feat_shape[1], + tf.cast(tf.convert_to_tensor(base_sizes), dtype=tf.float32), + tf.cast(tf.convert_to_tensor(aspect_ratios), dtype=tf.float32), + tf.constant([1.0, 1.0]), + tf.cast(tf.convert_to_tensor(anchor_stride), dtype=tf.float32), + tf.cast(tf.convert_to_tensor(anchor_offset), dtype=tf.float32)) + num_anchors = anchor_grid.num_boxes_static() + if num_anchors is None: + num_anchors = anchor_grid.num_boxes() + anchor_indices = tf.zeros([num_anchors]) + anchor_grid.add_field('feature_map_index', anchor_indices) + if self._normalize_coordinates: + if im_height == 1 or im_width == 1: + raise ValueError( + 'Normalized coordinates were requested upon construction of the ' + 'FlexibleGridAnchorGenerator, but a subsequent call to ' + 'generate did not supply dimension information.') + anchor_grid = box_list_ops.to_normalized_coordinates( + anchor_grid, im_height, im_width, check_range=False) + anchor_grid_list.append(anchor_grid) + + return anchor_grid_list diff --git a/anchor_generators/flexible_grid_anchor_generator_test.py b/anchor_generators/flexible_grid_anchor_generator_test.py new file mode 100644 index 0000000..2f09771 --- /dev/null +++ b/anchor_generators/flexible_grid_anchor_generator_test.py @@ -0,0 +1,294 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for anchor_generators.flexible_grid_anchor_generator_test.py.""" +import numpy as np +import tensorflow as tf + +from object_detection.anchor_generators import flexible_grid_anchor_generator as fg +from object_detection.utils import test_case + + +class FlexibleGridAnchorGeneratorTest(test_case.TestCase): + + def test_construct_single_anchor(self): + anchor_strides = [(32, 32),] + anchor_offsets = [(16, 16),] + base_sizes = [(128.0,)] + aspect_ratios = [(1.0,)] + im_height = 64 + im_width = 64 + feature_map_shape_list = [(2, 2)] + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112]] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_unit_dimensions(self): + anchor_strides = [(32, 32),] + anchor_offsets = [(16, 16),] + base_sizes = [(32.0,)] + aspect_ratios = [(1.0,)] + im_height = 1 + im_width = 1 + feature_map_shape_list = [(2, 2)] + # Positive offsets are produced. + exp_anchor_corners = [[0, 0, 32, 32], + [0, 32, 32, 64], + [32, 0, 64, 32], + [32, 32, 64, 64]] + + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_normalized_anchors_fails_with_unit_dimensions(self): + anchor_generator = fg.FlexibleGridAnchorGenerator( + [(32.0,)], [(1.0,)], [(32, 32),], [(16, 16),], + normalize_coordinates=True) + with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'): + anchor_generator.generate( + feature_map_shape_list=[(2, 2)], im_height=1, im_width=1) + + def test_construct_single_anchor_in_normalized_coordinates(self): + anchor_strides = [(32, 32),] + anchor_offsets = [(16, 16),] + base_sizes = [(128.0,)] + aspect_ratios = [(1.0,)] + im_height = 64 + im_width = 128 + feature_map_shape_list = [(2, 2)] + exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128], + [-48./64, -16./128, 80./64, 112./128], + [-16./64, -48./128, 112./64, 80./128], + [-16./64, -16./128, 112./64, 112./128]] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=True) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_num_anchors_per_location(self): + anchor_strides = [(32, 32), (64, 64)] + anchor_offsets = [(16, 16), (32, 32)] + base_sizes = [(32.0, 64.0, 96.0, 32.0, 64.0, 96.0), + (64.0, 128.0, 172.0, 64.0, 128.0, 172.0)] + aspect_ratios = [(1.0, 1.0, 1.0, 2.0, 2.0, 2.0), + (1.0, 1.0, 1.0, 2.0, 2.0, 2.0)] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6]) + + def test_construct_single_anchor_dynamic_size(self): + anchor_strides = [(32, 32),] + anchor_offsets = [(0, 0),] + base_sizes = [(128.0,)] + aspect_ratios = [(1.0,)] + im_height = tf.constant(64) + im_width = tf.constant(64) + feature_map_shape_list = [(2, 2)] + # Zero offsets are used. + exp_anchor_corners = [[-64, -64, 64, 64], + [-64, -32, 64, 96], + [-32, -64, 96, 64], + [-32, -32, 96, 96]] + + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_odd_input_dimension(self): + + def graph_fn(): + anchor_strides = [(32, 32),] + anchor_offsets = [(0, 0),] + base_sizes = [(128.0,)] + aspect_ratios = [(1.0,)] + im_height = 65 + im_width = 65 + feature_map_shape_list = [(3, 3)] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + return (anchor_corners,) + anchor_corners_out = self.execute(graph_fn, []) + exp_anchor_corners = [[-64, -64, 64, 64], + [-64, -32, 64, 96], + [-64, 0, 64, 128], + [-32, -64, 96, 64], + [-32, -32, 96, 96], + [-32, 0, 96, 128], + [0, -64, 128, 64], + [0, -32, 128, 96], + [0, 0, 128, 128]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_on_two_feature_maps(self): + + def graph_fn(): + anchor_strides = [(32, 32), (64, 64)] + anchor_offsets = [(16, 16), (32, 32)] + base_sizes = [(128.0,), (256.0,)] + aspect_ratios = [(1.0,), (1.0,)] + im_height = 64 + im_width = 64 + feature_map_shape_list = [(2, 2), (1, 1)] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + + anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112], + [-96, -96, 160, 160]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_two_scales_per_octave(self): + + def graph_fn(): + anchor_strides = [(64, 64),] + anchor_offsets = [(32, 32),] + base_sizes = [(256.0, 362.03867)] + aspect_ratios = [(1.0, 1.0)] + im_height = 64 + im_width = 64 + feature_map_shape_list = [(1, 1)] + + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + # There are 4 set of anchors in this configuration. The order is: + # [[2**0.0 intermediate scale + 1.0 aspect], + # [2**0.5 intermediate scale + 1.0 aspect]] + exp_anchor_corners = [[-96., -96., 160., 160.], + [-149.0193, -149.0193, 213.0193, 213.0193]] + + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self): + def graph_fn(): + anchor_strides = [(64, 64),] + anchor_offsets = [(32, 32),] + base_sizes = [(256.0, 362.03867, 256.0, 362.03867)] + aspect_ratios = [(1.0, 1.0, 2.0, 2.0)] + im_height = 64 + im_width = 64 + feature_map_shape_list = [(1, 1)] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + # There are 4 set of anchors in this configuration. The order is: + # [[2**0.0 intermediate scale + 1.0 aspect], + # [2**0.5 intermediate scale + 1.0 aspect], + # [2**0.0 intermediate scale + 2.0 aspect], + # [2**0.5 intermediate scale + 2.0 aspect]] + + exp_anchor_corners = [[-96., -96., 160., 160.], + [-149.0193, -149.0193, 213.0193, 213.0193], + [-58.50967, -149.0193, 122.50967, 213.0193], + [-96., -224., 160., 288.]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self): + + def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height, + feature_map2_width): + anchor_strides = [(32, 32), (64, 64)] + anchor_offsets = [(16, 16), (32, 32)] + base_sizes = [(128.0,), (256.0,)] + aspect_ratios = [(1.0,), (1.0,)] + im_height = 64 + im_width = 64 + feature_map_shape_list = [(feature_map1_height, feature_map1_width), + (feature_map2_height, feature_map2_width)] + anchor_generator = fg.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, anchor_strides, anchor_offsets, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + + anchor_corners_out = np.concatenate( + self.execute_cpu(graph_fn, [ + np.array(2, dtype=np.int32), + np.array(2, dtype=np.int32), + np.array(1, dtype=np.int32), + np.array(1, dtype=np.int32) + ]), + axis=0) + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112], + [-96, -96, 160, 160]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + +if __name__ == '__main__': + tf.test.main() diff --git a/anchor_generators/grid_anchor_generator.py b/anchor_generators/grid_anchor_generator.py new file mode 100644 index 0000000..4289256 --- /dev/null +++ b/anchor_generators/grid_anchor_generator.py @@ -0,0 +1,213 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Generates grid anchors on the fly as used in Faster RCNN. + +Generates grid anchors on the fly as described in: +"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" +Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. +""" + +import tensorflow as tf + +from object_detection.core import anchor_generator +from object_detection.core import box_list +from object_detection.utils import ops + + +class GridAnchorGenerator(anchor_generator.AnchorGenerator): + """Generates a grid of anchors at given scales and aspect ratios.""" + + def __init__(self, + scales=(0.5, 1.0, 2.0), + aspect_ratios=(0.5, 1.0, 2.0), + base_anchor_size=None, + anchor_stride=None, + anchor_offset=None): + """Constructs a GridAnchorGenerator. + + Args: + scales: a list of (float) scales, default=(0.5, 1.0, 2.0) + aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0) + base_anchor_size: base anchor size as height, width ( + (length-2 float32 list or tensor, default=[256, 256]) + anchor_stride: difference in centers between base anchors for adjacent + grid positions (length-2 float32 list or tensor, + default=[16, 16]) + anchor_offset: center of the anchor with scale and aspect ratio 1 for the + upper left element of the grid, this should be zero for + feature networks with only VALID padding and even receptive + field size, but may need additional calculation if other + padding is used (length-2 float32 list or tensor, + default=[0, 0]) + """ + # Handle argument defaults + if base_anchor_size is None: + base_anchor_size = [256, 256] + if anchor_stride is None: + anchor_stride = [16, 16] + if anchor_offset is None: + anchor_offset = [0, 0] + + self._scales = scales + self._aspect_ratios = aspect_ratios + self._base_anchor_size = base_anchor_size + self._anchor_stride = anchor_stride + self._anchor_offset = anchor_offset + + def name_scope(self): + return 'GridAnchorGenerator' + + def num_anchors_per_location(self): + """Returns the number of anchors per spatial location. + + Returns: + a list of integers, one for each expected feature map to be passed to + the `generate` function. + """ + return [len(self._scales) * len(self._aspect_ratios)] + + def _generate(self, feature_map_shape_list): + """Generates a collection of bounding boxes to be used as anchors. + + Args: + feature_map_shape_list: list of pairs of convnet layer resolutions in the + format [(height_0, width_0)]. For example, setting + feature_map_shape_list=[(8, 8)] asks for anchors that correspond + to an 8x8 layer. For this anchor generator, only lists of length 1 are + allowed. + + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + + Raises: + ValueError: if feature_map_shape_list, box_specs_list do not have the same + length. + ValueError: if feature_map_shape_list does not consist of pairs of + integers + """ + if not (isinstance(feature_map_shape_list, list) + and len(feature_map_shape_list) == 1): + raise ValueError('feature_map_shape_list must be a list of length 1.') + if not all([isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in feature_map_shape_list]): + raise ValueError('feature_map_shape_list must be a list of pairs.') + + # Create constants in init_scope so they can be created in tf.functions + # and accessed from outside of the function. + with tf.init_scope(): + self._base_anchor_size = tf.cast(tf.convert_to_tensor( + self._base_anchor_size), dtype=tf.float32) + self._anchor_stride = tf.cast(tf.convert_to_tensor( + self._anchor_stride), dtype=tf.float32) + self._anchor_offset = tf.cast(tf.convert_to_tensor( + self._anchor_offset), dtype=tf.float32) + + grid_height, grid_width = feature_map_shape_list[0] + scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales, + self._aspect_ratios) + scales_grid = tf.reshape(scales_grid, [-1]) + aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1]) + anchors = tile_anchors(grid_height, + grid_width, + scales_grid, + aspect_ratios_grid, + self._base_anchor_size, + self._anchor_stride, + self._anchor_offset) + + num_anchors = anchors.num_boxes_static() + if num_anchors is None: + num_anchors = anchors.num_boxes() + anchor_indices = tf.zeros([num_anchors]) + anchors.add_field('feature_map_index', anchor_indices) + return [anchors] + + +def tile_anchors(grid_height, + grid_width, + scales, + aspect_ratios, + base_anchor_size, + anchor_stride, + anchor_offset): + """Create a tiled set of anchors strided along a grid in image space. + + This op creates a set of anchor boxes by placing a "basis" collection of + boxes with user-specified scales and aspect ratios centered at evenly + distributed points along a grid. The basis collection is specified via the + scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] + and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale + .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 + and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before + placing it over its respective center. + + Grid points are specified via grid_height, grid_width parameters as well as + the anchor_stride and anchor_offset parameters. + + Args: + grid_height: size of the grid in the y direction (int or int scalar tensor) + grid_width: size of the grid in the x direction (int or int scalar tensor) + scales: a 1-d (float) tensor representing the scale of each box in the + basis set. + aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each + box in the basis set. The length of the scales and aspect_ratios tensors + must be equal. + base_anchor_size: base anchor size as [height, width] + (float tensor of shape [2]) + anchor_stride: difference in centers between base anchors for adjacent grid + positions (float tensor of shape [2]) + anchor_offset: center of the anchor with scale and aspect ratio 1 for the + upper left element of the grid, this should be zero for + feature networks with only VALID padding and even receptive + field size, but may need some additional calculation if other + padding is used (float tensor of shape [2]) + Returns: + a BoxList holding a collection of N anchor boxes + """ + ratio_sqrts = tf.sqrt(aspect_ratios) + heights = scales / ratio_sqrts * base_anchor_size[0] + widths = scales * ratio_sqrts * base_anchor_size[1] + + # Get a grid of box centers + y_centers = tf.cast(tf.range(grid_height), dtype=tf.float32) + y_centers = y_centers * anchor_stride[0] + anchor_offset[0] + x_centers = tf.cast(tf.range(grid_width), dtype=tf.float32) + x_centers = x_centers * anchor_stride[1] + anchor_offset[1] + x_centers, y_centers = ops.meshgrid(x_centers, y_centers) + + widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers) + heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers) + bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3) + bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3) + bbox_centers = tf.reshape(bbox_centers, [-1, 2]) + bbox_sizes = tf.reshape(bbox_sizes, [-1, 2]) + bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) + return box_list.BoxList(bbox_corners) + + +def _center_size_bbox_to_corners_bbox(centers, sizes): + """Converts bbox center-size representation to corners representation. + + Args: + centers: a tensor with shape [N, 2] representing bounding box centers + sizes: a tensor with shape [N, 2] representing bounding boxes + + Returns: + corners: tensor with shape [N, 4] representing bounding boxes in corners + representation + """ + return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1) diff --git a/anchor_generators/grid_anchor_generator_test.py b/anchor_generators/grid_anchor_generator_test.py new file mode 100644 index 0000000..8de74aa --- /dev/null +++ b/anchor_generators/grid_anchor_generator_test.py @@ -0,0 +1,104 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.grid_anchor_generator.""" +import numpy as np +import tensorflow as tf + +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.utils import test_case + + +class GridAnchorGeneratorTest(test_case.TestCase): + + def test_construct_single_anchor(self): + """Builds a 1x1 anchor grid to test the size of the output boxes.""" + def graph_fn(): + scales = [0.5, 1.0, 2.0] + aspect_ratios = [0.25, 1.0, 4.0] + anchor_offset = [7, -3] + anchor_generator = grid_anchor_generator.GridAnchorGenerator( + scales, aspect_ratios, anchor_offset=anchor_offset) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) + anchor_corners = anchors_list[0].get() + return (anchor_corners,) + exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], + [-505, -131, 519, 125], [-57, -67, 71, 61], + [-121, -131, 135, 125], [-249, -259, 263, 253], + [-25, -131, 39, 125], [-57, -259, 71, 253], + [-121, -515, 135, 509]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_anchor_grid(self): + def graph_fn(): + base_anchor_size = [10, 10] + anchor_stride = [19, 19] + anchor_offset = [0, 0] + scales = [0.5, 1.0, 2.0] + aspect_ratios = [1.0] + + anchor_generator = grid_anchor_generator.GridAnchorGenerator( + scales, + aspect_ratios, + base_anchor_size=base_anchor_size, + anchor_stride=anchor_stride, + anchor_offset=anchor_offset) + + anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) + anchor_corners = anchors_list[0].get() + return (anchor_corners,) + exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], + [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], + [-5., 14., 5, 24], [-10., 9., 10, 29], + [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], + [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], + [14., 14., 24, 24], [9., 9., 29, 29]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self): + def graph_fn(feature_map_height, feature_map_width): + base_anchor_size = [10, 10] + anchor_stride = [19, 19] + anchor_offset = [0, 0] + scales = [0.5, 1.0, 2.0] + aspect_ratios = [1.0] + anchor_generator = grid_anchor_generator.GridAnchorGenerator( + scales, + aspect_ratios, + base_anchor_size=base_anchor_size, + anchor_stride=anchor_stride, + anchor_offset=anchor_offset) + + anchors_list = anchor_generator.generate( + feature_map_shape_list=[(feature_map_height, feature_map_width)]) + anchor_corners = anchors_list[0].get() + return (anchor_corners,) + + exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], + [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], + [-5., 14., 5, 24], [-10., 9., 10, 29], + [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], + [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], + [14., 14., 24, 24], [9., 9., 29, 29]] + anchor_corners_out = self.execute_cpu(graph_fn, + [np.array(2, dtype=np.int32), + np.array(2, dtype=np.int32)]) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + +if __name__ == '__main__': + tf.test.main() diff --git a/anchor_generators/multiple_grid_anchor_generator.py b/anchor_generators/multiple_grid_anchor_generator.py new file mode 100644 index 0000000..86007c9 --- /dev/null +++ b/anchor_generators/multiple_grid_anchor_generator.py @@ -0,0 +1,342 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Generates grid anchors on the fly corresponding to multiple CNN layers. + +Generates grid anchors on the fly corresponding to multiple CNN layers as +described in: +"SSD: Single Shot MultiBox Detector" +Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, +Cheng-Yang Fu, Alexander C. Berg +(see Section 2.2: Choosing scales and aspect ratios for default boxes) +""" + +import numpy as np + +import tensorflow as tf + +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.core import anchor_generator +from object_detection.core import box_list_ops + + +class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator): + """Generate a grid of anchors for multiple CNN layers.""" + + def __init__(self, + box_specs_list, + base_anchor_size=None, + anchor_strides=None, + anchor_offsets=None, + clip_window=None): + """Constructs a MultipleGridAnchorGenerator. + + To construct anchors, at multiple grid resolutions, one must provide a + list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid + size, a corresponding list of (scale, aspect ratio) box specifications. + + For example: + box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid + [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid + + To support the fully convolutional setting, we pass grid sizes in at + generation time, while scale and aspect ratios are fixed at construction + time. + + Args: + box_specs_list: list of list of (scale, aspect ratio) pairs with the + outside list having the same number of entries as feature_map_shape_list + (which is passed in at generation time). + base_anchor_size: base anchor size as [height, width] + (length-2 float numpy or Tensor, default=[1.0, 1.0]). + The height and width values are normalized to the + minimum dimension of the input height and width, so that + when the base anchor height equals the base anchor + width, the resulting anchor is square even if the input + image is not square. + anchor_strides: list of pairs of strides in pixels (in y and x directions + respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] + means that we want the anchors corresponding to the first layer to be + strided by 25 pixels and those in the second layer to be strided by 50 + pixels in both y and x directions. If anchor_strides=None, they are set + to be the reciprocal of the corresponding feature map shapes. + anchor_offsets: list of pairs of offsets in pixels (in y and x directions + respectively). The offset specifies where we want the center of the + (0, 0)-th anchor to lie for each layer. For example, setting + anchor_offsets=[(10, 10), (20, 20)]) means that we want the + (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space + and likewise that we want the (0, 0)-th anchor of the second layer to + lie at (25, 25) in pixel space. If anchor_offsets=None, then they are + set to be half of the corresponding anchor stride. + clip_window: a tensor of shape [4] specifying a window to which all + anchors should be clipped. If clip_window is None, then no clipping + is performed. + + Raises: + ValueError: if box_specs_list is not a list of list of pairs + ValueError: if clip_window is not either None or a tensor of shape [4] + """ + if isinstance(box_specs_list, list) and all( + [isinstance(list_item, list) for list_item in box_specs_list]): + self._box_specs = box_specs_list + else: + raise ValueError('box_specs_list is expected to be a ' + 'list of lists of pairs') + if base_anchor_size is None: + base_anchor_size = [256, 256] + self._base_anchor_size = base_anchor_size + self._anchor_strides = anchor_strides + self._anchor_offsets = anchor_offsets + if clip_window is not None and clip_window.get_shape().as_list() != [4]: + raise ValueError('clip_window must either be None or a shape [4] tensor') + self._clip_window = clip_window + self._scales = [] + self._aspect_ratios = [] + for box_spec in self._box_specs: + if not all([isinstance(entry, tuple) and len(entry) == 2 + for entry in box_spec]): + raise ValueError('box_specs_list is expected to be a ' + 'list of lists of pairs') + scales, aspect_ratios = zip(*box_spec) + self._scales.append(scales) + self._aspect_ratios.append(aspect_ratios) + + for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets], + ['anchor_strides', 'anchor_offsets']): + if arg and not (isinstance(arg, list) and + len(arg) == len(self._box_specs)): + raise ValueError('%s must be a list with the same length ' + 'as self._box_specs' % arg_name) + if arg and not all([ + isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in arg + ]): + raise ValueError('%s must be a list of pairs.' % arg_name) + + def name_scope(self): + return 'MultipleGridAnchorGenerator' + + def num_anchors_per_location(self): + """Returns the number of anchors per spatial location. + + Returns: + a list of integers, one for each expected feature map to be passed to + the Generate function. + """ + return [len(box_specs) for box_specs in self._box_specs] + + def _generate(self, feature_map_shape_list, im_height=1, im_width=1): + """Generates a collection of bounding boxes to be used as anchors. + + The number of anchors generated for a single grid with shape MxM where we + place k boxes over each grid center is k*M^2 and thus the total number of + anchors is the sum over all grids. In our box_specs_list example + (see the constructor docstring), we would place two boxes over each grid + point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and + thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the + output anchors follows the order of how the grid sizes and box_specs are + specified (with box_spec index varying the fastest, followed by width + index, then height index, then grid index). + + Args: + feature_map_shape_list: list of pairs of convnet layer resolutions in the + format [(height_0, width_0), (height_1, width_1), ...]. For example, + setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that + correspond to an 8x8 layer followed by a 7x7 layer. + im_height: the height of the image to generate the grid for. If both + im_height and im_width are 1, the generated anchors default to + absolute coordinates, otherwise normalized coordinates are produced. + im_width: the width of the image to generate the grid for. If both + im_height and im_width are 1, the generated anchors default to + absolute coordinates, otherwise normalized coordinates are produced. + + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + + Raises: + ValueError: if feature_map_shape_list, box_specs_list do not have the same + length. + ValueError: if feature_map_shape_list does not consist of pairs of + integers + """ + if not (isinstance(feature_map_shape_list, list) + and len(feature_map_shape_list) == len(self._box_specs)): + raise ValueError('feature_map_shape_list must be a list with the same ' + 'length as self._box_specs') + if not all([isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in feature_map_shape_list]): + raise ValueError('feature_map_shape_list must be a list of pairs.') + + im_height = tf.cast(im_height, dtype=tf.float32) + im_width = tf.cast(im_width, dtype=tf.float32) + + if not self._anchor_strides: + anchor_strides = [(1.0 / tf.cast(pair[0], dtype=tf.float32), + 1.0 / tf.cast(pair[1], dtype=tf.float32)) + for pair in feature_map_shape_list] + else: + anchor_strides = [(tf.cast(stride[0], dtype=tf.float32) / im_height, + tf.cast(stride[1], dtype=tf.float32) / im_width) + for stride in self._anchor_strides] + if not self._anchor_offsets: + anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) + for stride in anchor_strides] + else: + anchor_offsets = [(tf.cast(offset[0], dtype=tf.float32) / im_height, + tf.cast(offset[1], dtype=tf.float32) / im_width) + for offset in self._anchor_offsets] + + for arg, arg_name in zip([anchor_strides, anchor_offsets], + ['anchor_strides', 'anchor_offsets']): + if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): + raise ValueError('%s must be a list with the same length ' + 'as self._box_specs' % arg_name) + if not all([isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in arg]): + raise ValueError('%s must be a list of pairs.' % arg_name) + + anchor_grid_list = [] + min_im_shape = tf.minimum(im_height, im_width) + scale_height = min_im_shape / im_height + scale_width = min_im_shape / im_width + if not tf.contrib.framework.is_tensor(self._base_anchor_size): + base_anchor_size = [ + scale_height * tf.constant(self._base_anchor_size[0], + dtype=tf.float32), + scale_width * tf.constant(self._base_anchor_size[1], + dtype=tf.float32) + ] + else: + base_anchor_size = [ + scale_height * self._base_anchor_size[0], + scale_width * self._base_anchor_size[1] + ] + for feature_map_index, (grid_size, scales, aspect_ratios, stride, + offset) in enumerate( + zip(feature_map_shape_list, self._scales, + self._aspect_ratios, anchor_strides, + anchor_offsets)): + tiled_anchors = grid_anchor_generator.tile_anchors( + grid_height=grid_size[0], + grid_width=grid_size[1], + scales=scales, + aspect_ratios=aspect_ratios, + base_anchor_size=base_anchor_size, + anchor_stride=stride, + anchor_offset=offset) + if self._clip_window is not None: + tiled_anchors = box_list_ops.clip_to_window( + tiled_anchors, self._clip_window, filter_nonoverlapping=False) + num_anchors_in_layer = tiled_anchors.num_boxes_static() + if num_anchors_in_layer is None: + num_anchors_in_layer = tiled_anchors.num_boxes() + anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer]) + tiled_anchors.add_field('feature_map_index', anchor_indices) + anchor_grid_list.append(tiled_anchors) + + return anchor_grid_list + + +def create_ssd_anchors(num_layers=6, + min_scale=0.2, + max_scale=0.95, + scales=None, + aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), + interpolated_scale_aspect_ratio=1.0, + base_anchor_size=None, + anchor_strides=None, + anchor_offsets=None, + reduce_boxes_in_lowest_layer=True): + """Creates MultipleGridAnchorGenerator for SSD anchors. + + This function instantiates a MultipleGridAnchorGenerator that reproduces + ``default box`` construction proposed by Liu et al in the SSD paper. + See Section 2.2 for details. Grid sizes are assumed to be passed in + at generation time from finest resolution to coarsest resolution --- this is + used to (linearly) interpolate scales of anchor boxes corresponding to the + intermediate grid sizes. + + Anchors that are returned by calling the `generate` method on the returned + MultipleGridAnchorGenerator object are always in normalized coordinates + and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]). + + Args: + num_layers: integer number of grid layers to create anchors for (actual + grid sizes passed in at generation time) + min_scale: scale of anchors corresponding to finest resolution (float) + max_scale: scale of anchors corresponding to coarsest resolution (float) + scales: As list of anchor scales to use. When not None and not empty, + min_scale and max_scale are not used. + aspect_ratios: list or tuple of (float) aspect ratios to place on each + grid point. + interpolated_scale_aspect_ratio: An additional anchor is added with this + aspect ratio and a scale interpolated between the scale for a layer + and the scale for the next layer (1.0 for the last layer). + This anchor is not included if this value is 0. + base_anchor_size: base anchor size as [height, width]. + The height and width values are normalized to the minimum dimension of the + input height and width, so that when the base anchor height equals the + base anchor width, the resulting anchor is square even if the input image + is not square. + anchor_strides: list of pairs of strides in pixels (in y and x directions + respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] + means that we want the anchors corresponding to the first layer to be + strided by 25 pixels and those in the second layer to be strided by 50 + pixels in both y and x directions. If anchor_strides=None, they are set to + be the reciprocal of the corresponding feature map shapes. + anchor_offsets: list of pairs of offsets in pixels (in y and x directions + respectively). The offset specifies where we want the center of the + (0, 0)-th anchor to lie for each layer. For example, setting + anchor_offsets=[(10, 10), (20, 20)]) means that we want the + (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space + and likewise that we want the (0, 0)-th anchor of the second layer to lie + at (25, 25) in pixel space. If anchor_offsets=None, then they are set to + be half of the corresponding anchor stride. + reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3 + boxes per location is used in the lowest layer. + + Returns: + a MultipleGridAnchorGenerator + """ + if base_anchor_size is None: + base_anchor_size = [1.0, 1.0] + box_specs_list = [] + if scales is None or not scales: + scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) + for i in range(num_layers)] + [1.0] + else: + # Add 1.0 to the end, which will only be used in scale_next below and used + # for computing an interpolated scale for the largest scale in the list. + scales += [1.0] + + for layer, scale, scale_next in zip( + range(num_layers), scales[:-1], scales[1:]): + layer_box_specs = [] + if layer == 0 and reduce_boxes_in_lowest_layer: + layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)] + else: + for aspect_ratio in aspect_ratios: + layer_box_specs.append((scale, aspect_ratio)) + # Add one more anchor, with a scale between the current scale, and the + # scale for the next layer, with a specified aspect ratio (1.0 by + # default). + if interpolated_scale_aspect_ratio > 0.0: + layer_box_specs.append((np.sqrt(scale*scale_next), + interpolated_scale_aspect_ratio)) + box_specs_list.append(layer_box_specs) + + return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size, + anchor_strides, anchor_offsets) diff --git a/anchor_generators/multiple_grid_anchor_generator_test.py b/anchor_generators/multiple_grid_anchor_generator_test.py new file mode 100644 index 0000000..070d81d --- /dev/null +++ b/anchor_generators/multiple_grid_anchor_generator_test.py @@ -0,0 +1,289 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py.""" + +import numpy as np + +import tensorflow as tf + +from object_detection.anchor_generators import multiple_grid_anchor_generator as ag +from object_detection.utils import test_case + + +class MultipleGridAnchorGeneratorTest(test_case.TestCase): + + def test_construct_single_anchor_grid(self): + """Builds a 1x1 anchor grid to test the size of the output boxes.""" + def graph_fn(): + + box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25), + (.5, 1.0), (1.0, 1.0), (2.0, 1.0), + (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]] + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([256, 256], dtype=tf.float32), + anchor_strides=[(16, 16)], + anchor_offsets=[(7, -3)]) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) + return anchors_list[0].get() + exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], + [-505, -131, 519, 125], [-57, -67, 71, 61], + [-121, -131, 135, 125], [-249, -259, 263, 253], + [-25, -131, 39, 125], [-57, -259, 71, 253], + [-121, -515, 135, 509]] + + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_anchor_grid(self): + def graph_fn(): + box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]] + + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([10, 10], dtype=tf.float32), + anchor_strides=[(19, 19)], + anchor_offsets=[(0, 0)]) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) + return anchors_list[0].get() + exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], + [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], + [-5., 14., 5, 24], [-10., 9., 10, 29], + [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], + [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], + [14., 14., 24, 24], [9., 9., 29, 29]] + + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_anchor_grid_non_square(self): + + def graph_fn(): + box_specs_list = [[(1.0, 1.0)]] + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, base_anchor_size=tf.constant([1, 1], + dtype=tf.float32)) + anchors_list = anchor_generator.generate(feature_map_shape_list=[( + tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))]) + return anchors_list[0].get() + + exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_dynamic_size_anchor_grid(self): + + def graph_fn(height, width): + box_specs_list = [[(1.0, 1.0)]] + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, base_anchor_size=tf.constant([1, 1], + dtype=tf.float32)) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(height, + width)]) + return anchors_list[0].get() + + exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]] + + anchor_corners_out = self.execute_cpu(graph_fn, + [np.array(1, dtype=np.int32), + np.array(2, dtype=np.int32)]) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_anchor_grid_normalized(self): + def graph_fn(): + box_specs_list = [[(1.0, 1.0)]] + + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, base_anchor_size=tf.constant([1, 1], + dtype=tf.float32)) + anchors_list = anchor_generator.generate( + feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant( + 2, dtype=tf.int32))], + im_height=320, + im_width=640) + return anchors_list[0].get() + + exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_multiple_grids(self): + + def graph_fn(): + box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], + [(1.0, 1.0), (1.0, 0.5)]] + + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25), (.5, .5)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), ( + 2, 2)]) + return [anchors.get() for anchors in anchors_list] + # height and width of box with .5 aspect ratio + h = np.sqrt(2) + w = 1.0/np.sqrt(2) + exp_small_grid_corners = [[-.25, -.25, .75, .75], + [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w], + [-.25, .25, .75, 1.25], + [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w], + [.25, -.25, 1.25, .75], + [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w], + [.25, .25, 1.25, 1.25], + [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]] + # only test first entry of larger set of anchors + exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5], + [.125-1.0, .125-1.0, .125+1.0, .125+1.0], + [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],] + + anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) + self.assertEquals(anchor_corners_out.shape, (56, 4)) + big_grid_corners = anchor_corners_out[0:3, :] + small_grid_corners = anchor_corners_out[48:, :] + self.assertAllClose(small_grid_corners, exp_small_grid_corners) + self.assertAllClose(big_grid_corners, exp_big_grid_corners) + + def test_construct_multiple_grids_with_clipping(self): + + def graph_fn(): + box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], + [(1.0, 1.0), (1.0, 0.5)]] + + clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32) + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + clip_window=clip_window) + anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), ( + 2, 2)]) + return [anchors.get() for anchors in anchors_list] + # height and width of box with .5 aspect ratio + h = np.sqrt(2) + w = 1.0/np.sqrt(2) + exp_small_grid_corners = [[0, 0, .75, .75], + [0, 0, .25+.5*h, .25+.5*w], + [0, .25, .75, 1], + [0, .75-.5*w, .25+.5*h, 1], + [.25, 0, 1, .75], + [.75-.5*h, 0, 1, .25+.5*w], + [.25, .25, 1, 1], + [.75-.5*h, .75-.5*w, 1, 1]] + + anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) + small_grid_corners = anchor_corners_out[48:, :] + self.assertAllClose(small_grid_corners, exp_small_grid_corners) + + def test_invalid_box_specs(self): + # not all box specs are pairs + box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], + [(1.0, 1.0), (1.0, 0.5, .3)]] + with self.assertRaises(ValueError): + ag.MultipleGridAnchorGenerator(box_specs_list) + + # box_specs_list is not a list of lists + box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)] + with self.assertRaises(ValueError): + ag.MultipleGridAnchorGenerator(box_specs_list) + + def test_invalid_generate_arguments(self): + box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], + [(1.0, 1.0), (1.0, 0.5)]] + + # incompatible lengths with box_specs_list + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25), (.5, .5)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)]) + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.5, .5)], + anchor_offsets=[(.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) + + # not pairs + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25), (.5, .5)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)]) + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25, .1), (.5, .5)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) + with self.assertRaises(ValueError): + anchor_generator = ag.MultipleGridAnchorGenerator( + box_specs_list, + base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), + anchor_strides=[(.25, .25), (.5, .5)], + anchor_offsets=[(.125, .125), (.25, .25)]) + anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)]) + + +class CreateSSDAnchorsTest(test_case.TestCase): + + def test_create_ssd_anchors_returns_correct_shape(self): + + def graph_fn1(): + anchor_generator = ag.create_ssd_anchors( + num_layers=6, + min_scale=0.2, + max_scale=0.95, + aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), + reduce_boxes_in_lowest_layer=True) + + feature_map_shape_list = [(38, 38), (19, 19), (10, 10), + (5, 5), (3, 3), (1, 1)] + anchors_list = anchor_generator.generate( + feature_map_shape_list=feature_map_shape_list) + return [anchors.get() for anchors in anchors_list] + anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0) + self.assertEquals(anchor_corners_out.shape, (7308, 4)) + + def graph_fn2(): + anchor_generator = ag.create_ssd_anchors( + num_layers=6, min_scale=0.2, max_scale=0.95, + aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3), + reduce_boxes_in_lowest_layer=False) + + feature_map_shape_list = [(38, 38), (19, 19), (10, 10), + (5, 5), (3, 3), (1, 1)] + anchors_list = anchor_generator.generate( + feature_map_shape_list=feature_map_shape_list) + return [anchors.get() for anchors in anchors_list] + anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0) + self.assertEquals(anchor_corners_out.shape, (11640, 4)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/anchor_generators/multiscale_grid_anchor_generator.py b/anchor_generators/multiscale_grid_anchor_generator.py new file mode 100644 index 0000000..cd2440a --- /dev/null +++ b/anchor_generators/multiscale_grid_anchor_generator.py @@ -0,0 +1,145 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Generates grid anchors on the fly corresponding to multiple CNN layers. + +Generates grid anchors on the fly corresponding to multiple CNN layers as +described in: +"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002) +T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar +""" + +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.core import anchor_generator +from object_detection.core import box_list_ops + + +class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): + """Generate a grid of anchors for multiple CNN layers of different scale.""" + + def __init__(self, min_level, max_level, anchor_scale, aspect_ratios, + scales_per_octave, normalize_coordinates=True): + """Constructs a MultiscaleGridAnchorGenerator. + + To construct anchors, at multiple scale resolutions, one must provide a + the minimum level and maximum levels on a scale pyramid. To define the size + of anchor, the anchor scale is provided to decide the size relatively to the + stride of the corresponding feature map. The generator allows one pixel + location on feature map maps to multiple anchors, that have different aspect + ratios and intermediate scales. + + Args: + min_level: minimum level in feature pyramid. + max_level: maximum level in feature pyramid. + anchor_scale: anchor scale and feature stride define the size of the base + anchor on an image. For example, given a feature pyramid with strides + [2^3, ..., 2^7] and anchor scale 4. The base anchor size is + 4 * [2^3, ..., 2^7]. + aspect_ratios: list or tuple of (float) aspect ratios to place on each + grid point. + scales_per_octave: integer number of intermediate scales per scale octave. + normalize_coordinates: whether to produce anchors in normalized + coordinates. (defaults to True). + """ + self._anchor_grid_info = [] + self._aspect_ratios = aspect_ratios + self._scales_per_octave = scales_per_octave + self._normalize_coordinates = normalize_coordinates + + scales = [2**(float(scale) / scales_per_octave) + for scale in range(scales_per_octave)] + aspects = list(aspect_ratios) + + for level in range(min_level, max_level + 1): + anchor_stride = [2**level, 2**level] + base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale] + self._anchor_grid_info.append({ + 'level': level, + 'info': [scales, aspects, base_anchor_size, anchor_stride] + }) + + def name_scope(self): + return 'MultiscaleGridAnchorGenerator' + + def num_anchors_per_location(self): + """Returns the number of anchors per spatial location. + + Returns: + a list of integers, one for each expected feature map to be passed to + the Generate function. + """ + return len(self._anchor_grid_info) * [ + len(self._aspect_ratios) * self._scales_per_octave] + + def _generate(self, feature_map_shape_list, im_height=1, im_width=1): + """Generates a collection of bounding boxes to be used as anchors. + + Currently we require the input image shape to be statically defined. That + is, im_height and im_width should be integers rather than tensors. + + Args: + feature_map_shape_list: list of pairs of convnet layer resolutions in the + format [(height_0, width_0), (height_1, width_1), ...]. For example, + setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that + correspond to an 8x8 layer followed by a 7x7 layer. + im_height: the height of the image to generate the grid for. If both + im_height and im_width are 1, anchors can only be generated in + absolute coordinates. + im_width: the width of the image to generate the grid for. If both + im_height and im_width are 1, anchors can only be generated in + absolute coordinates. + + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + Raises: + ValueError: if im_height and im_width are not integers. + ValueError: if im_height and im_width are 1, but normalized coordinates + were requested. + """ + anchor_grid_list = [] + for feat_shape, grid_info in zip(feature_map_shape_list, + self._anchor_grid_info): + # TODO(rathodv) check the feature_map_shape_list is consistent with + # self._anchor_grid_info + level = grid_info['level'] + stride = 2**level + scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info'] + feat_h = feat_shape[0] + feat_w = feat_shape[1] + anchor_offset = [0, 0] + if isinstance(im_height, int) and isinstance(im_width, int): + if im_height % 2.0**level == 0 or im_height == 1: + anchor_offset[0] = stride / 2.0 + if im_width % 2.0**level == 0 or im_width == 1: + anchor_offset[1] = stride / 2.0 + ag = grid_anchor_generator.GridAnchorGenerator( + scales, + aspect_ratios, + base_anchor_size=base_anchor_size, + anchor_stride=anchor_stride, + anchor_offset=anchor_offset) + (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) + + if self._normalize_coordinates: + if im_height == 1 or im_width == 1: + raise ValueError( + 'Normalized coordinates were requested upon construction of the ' + 'MultiscaleGridAnchorGenerator, but a subsequent call to ' + 'generate did not supply dimension information.') + anchor_grid = box_list_ops.to_normalized_coordinates( + anchor_grid, im_height, im_width, check_range=False) + anchor_grid_list.append(anchor_grid) + + return anchor_grid_list diff --git a/anchor_generators/multiscale_grid_anchor_generator_test.py b/anchor_generators/multiscale_grid_anchor_generator_test.py new file mode 100644 index 0000000..178705c --- /dev/null +++ b/anchor_generators/multiscale_grid_anchor_generator_test.py @@ -0,0 +1,302 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py.""" +import numpy as np +import tensorflow as tf + +from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg +from object_detection.utils import test_case + + +class MultiscaleGridAnchorGeneratorTest(test_case.TestCase): + + def test_construct_single_anchor(self): + min_level = 5 + max_level = 5 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 64 + im_width = 64 + feature_map_shape_list = [(2, 2)] + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112]] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_unit_dimensions(self): + min_level = 5 + max_level = 5 + anchor_scale = 1.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 1 + im_width = 1 + feature_map_shape_list = [(2, 2)] + # Positive offsets are produced. + exp_anchor_corners = [[0, 0, 32, 32], + [0, 32, 32, 64], + [32, 0, 64, 32], + [32, 32, 64, 64]] + + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_normalized_anchors_fails_with_unit_dimensions(self): + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level=5, max_level=5, anchor_scale=1.0, aspect_ratios=[1.0], + scales_per_octave=1, normalize_coordinates=True) + with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'): + anchor_generator.generate( + feature_map_shape_list=[(2, 2)], im_height=1, im_width=1) + + def test_construct_single_anchor_in_normalized_coordinates(self): + min_level = 5 + max_level = 5 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 64 + im_width = 128 + feature_map_shape_list = [(2, 2)] + exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128], + [-48./64, -16./128, 80./64, 112./128], + [-16./64, -48./128, 112./64, 80./128], + [-16./64, -16./128, 112./64, 112./128]] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=True) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_num_anchors_per_location(self): + min_level = 5 + max_level = 6 + anchor_scale = 4.0 + aspect_ratios = [1.0, 2.0] + scales_per_octave = 3 + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6]) + + def test_construct_single_anchor_dynamic_size(self): + min_level = 5 + max_level = 5 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = tf.constant(64) + im_width = tf.constant(64) + feature_map_shape_list = [(2, 2)] + # Zero offsets are used. + exp_anchor_corners = [[-64, -64, 64, 64], + [-64, -32, 64, 96], + [-32, -64, 96, 64], + [-32, -32, 96, 96]] + + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + + with self.test_session(): + anchor_corners_out = anchor_corners.eval() + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_odd_input_dimension(self): + + def graph_fn(): + min_level = 5 + max_level = 5 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 65 + im_width = 65 + feature_map_shape_list = [(3, 3)] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate( + feature_map_shape_list, im_height=im_height, im_width=im_width) + anchor_corners = anchors_list[0].get() + return (anchor_corners,) + anchor_corners_out = self.execute(graph_fn, []) + exp_anchor_corners = [[-64, -64, 64, 64], + [-64, -32, 64, 96], + [-64, 0, 64, 128], + [-32, -64, 96, 64], + [-32, -32, 96, 96], + [-32, 0, 96, 128], + [0, -64, 128, 64], + [0, -32, 128, 96], + [0, 0, 128, 128]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_on_two_feature_maps(self): + + def graph_fn(): + min_level = 5 + max_level = 6 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 64 + im_width = 64 + feature_map_shape_list = [(2, 2), (1, 1)] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + + anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0) + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112], + [-96, -96, 160, 160]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_two_scales_per_octave(self): + + def graph_fn(): + min_level = 6 + max_level = 6 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 2 + im_height = 64 + im_width = 64 + feature_map_shape_list = [(1, 1)] + + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + # There are 4 set of anchors in this configuration. The order is: + # [[2**0.0 intermediate scale + 1.0 aspect], + # [2**0.5 intermediate scale + 1.0 aspect]] + exp_anchor_corners = [[-96., -96., 160., 160.], + [-149.0193, -149.0193, 213.0193, 213.0193]] + + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self): + def graph_fn(): + min_level = 6 + max_level = 6 + anchor_scale = 4.0 + aspect_ratios = [1.0, 2.0] + scales_per_octave = 2 + im_height = 64 + im_width = 64 + feature_map_shape_list = [(1, 1)] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + # There are 4 set of anchors in this configuration. The order is: + # [[2**0.0 intermediate scale + 1.0 aspect], + # [2**0.5 intermediate scale + 1.0 aspect], + # [2**0.0 intermediate scale + 2.0 aspect], + # [2**0.5 intermediate scale + 2.0 aspect]] + + exp_anchor_corners = [[-96., -96., 160., 160.], + [-149.0193, -149.0193, 213.0193, 213.0193], + [-58.50967, -149.0193, 122.50967, 213.0193], + [-96., -224., 160., 288.]] + anchor_corners_out = self.execute(graph_fn, []) + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self): + + def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height, + feature_map2_width): + min_level = 5 + max_level = 6 + anchor_scale = 4.0 + aspect_ratios = [1.0] + scales_per_octave = 1 + im_height = 64 + im_width = 64 + feature_map_shape_list = [(feature_map1_height, feature_map1_width), + (feature_map2_height, feature_map2_width)] + anchor_generator = mg.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates=False) + anchors_list = anchor_generator.generate(feature_map_shape_list, + im_height=im_height, + im_width=im_width) + anchor_corners = [anchors.get() for anchors in anchors_list] + return anchor_corners + + anchor_corners_out = np.concatenate( + self.execute_cpu(graph_fn, [ + np.array(2, dtype=np.int32), + np.array(2, dtype=np.int32), + np.array(1, dtype=np.int32), + np.array(1, dtype=np.int32) + ]), + axis=0) + exp_anchor_corners = [[-48, -48, 80, 80], + [-48, -16, 80, 112], + [-16, -48, 112, 80], + [-16, -16, 112, 112], + [-96, -96, 160, 160]] + self.assertAllClose(anchor_corners_out, exp_anchor_corners) + + +if __name__ == '__main__': + tf.test.main() diff --git a/box_coders/faster_rcnn_box_coder.py b/box_coders/faster_rcnn_box_coder.py new file mode 100644 index 0000000..af25e21 --- /dev/null +++ b/box_coders/faster_rcnn_box_coder.py @@ -0,0 +1,118 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Faster RCNN box coder. + +Faster RCNN box coder follows the coding schema described below: + ty = (y - ya) / ha + tx = (x - xa) / wa + th = log(h / ha) + tw = log(w / wa) + where x, y, w, h denote the box's center coordinates, width and height + respectively. Similarly, xa, ya, wa, ha denote the anchor's center + coordinates, width and height. tx, ty, tw and th denote the anchor-encoded + center, width and height respectively. + + See http://arxiv.org/abs/1506.01497 for details. +""" + +import tensorflow as tf + +from object_detection.core import box_coder +from object_detection.core import box_list + +EPSILON = 1e-8 + + +class FasterRcnnBoxCoder(box_coder.BoxCoder): + """Faster RCNN box coder.""" + + def __init__(self, scale_factors=None): + """Constructor for FasterRcnnBoxCoder. + + Args: + scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. + If set to None, does not perform scaling. For Faster RCNN, + the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. + """ + if scale_factors: + assert len(scale_factors) == 4 + for scalar in scale_factors: + assert scalar > 0 + self._scale_factors = scale_factors + + @property + def code_size(self): + return 4 + + def _encode(self, boxes, anchors): + """Encode a box collection with respect to anchor collection. + + Args: + boxes: BoxList holding N boxes to be encoded. + anchors: BoxList of anchors. + + Returns: + a tensor representing N anchor-encoded boxes of the format + [ty, tx, th, tw]. + """ + # Convert anchors to the center coordinate representation. + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() + # Avoid NaN in division and log below. + ha += EPSILON + wa += EPSILON + h += EPSILON + w += EPSILON + + tx = (xcenter - xcenter_a) / wa + ty = (ycenter - ycenter_a) / ha + tw = tf.log(w / wa) + th = tf.log(h / ha) + # Scales location targets as used in paper for joint training. + if self._scale_factors: + ty *= self._scale_factors[0] + tx *= self._scale_factors[1] + th *= self._scale_factors[2] + tw *= self._scale_factors[3] + return tf.transpose(tf.stack([ty, tx, th, tw])) + + def _decode(self, rel_codes, anchors): + """Decode relative codes to boxes. + + Args: + rel_codes: a tensor representing N anchor-encoded boxes. + anchors: BoxList of anchors. + + Returns: + boxes: BoxList holding N bounding boxes. + """ + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + + ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) + if self._scale_factors: + ty /= self._scale_factors[0] + tx /= self._scale_factors[1] + th /= self._scale_factors[2] + tw /= self._scale_factors[3] + w = tf.exp(tw) * wa + h = tf.exp(th) * ha + ycenter = ty * ha + ycenter_a + xcenter = tx * wa + xcenter_a + ymin = ycenter - h / 2. + xmin = xcenter - w / 2. + ymax = ycenter + h / 2. + xmax = xcenter + w / 2. + return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/box_coders/faster_rcnn_box_coder_test.py b/box_coders/faster_rcnn_box_coder_test.py new file mode 100644 index 0000000..b2135f0 --- /dev/null +++ b/box_coders/faster_rcnn_box_coder_test.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.box_coder.faster_rcnn_box_coder.""" + +import tensorflow as tf + +from object_detection.box_coders import faster_rcnn_box_coder +from object_detection.core import box_list + + +class FasterRcnnBoxCoderTest(tf.test.TestCase): + + def test_get_correct_relative_codes_after_encoding(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], + [-0.083333, -0.222222, -0.693147, -1.098612]] + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_get_correct_relative_codes_after_encoding_with_scaling(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + scale_factors = [2, 3, 4, 5] + expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608], + [-0.166667, -0.666667, -2.772588, -5.493062]] + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( + scale_factors=scale_factors) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_get_correct_boxes_after_decoding(self): + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], + [-0.083333, -0.222222, -0.693147, -1.098612]] + expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = box_list.BoxList(tf.constant(anchors)) + coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + boxes_out, = sess.run([boxes.get()]) + self.assertAllClose(boxes_out, expected_boxes) + + def test_get_correct_boxes_after_decoding_with_scaling(self): + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + rel_codes = [[-1., -1.25, -1.62186, -0.911608], + [-0.166667, -0.666667, -2.772588, -5.493062]] + scale_factors = [2, 3, 4, 5] + expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = box_list.BoxList(tf.constant(anchors)) + coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( + scale_factors=scale_factors) + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + boxes_out, = sess.run([boxes.get()]) + self.assertAllClose(boxes_out, expected_boxes) + + def test_very_small_Width_nan_after_encoding(self): + boxes = [[10.0, 10.0, 10.0000001, 20.0]] + anchors = [[15.0, 12.0, 30.0, 18.0]] + expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]] + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + +if __name__ == '__main__': + tf.test.main() diff --git a/box_coders/keypoint_box_coder.py b/box_coders/keypoint_box_coder.py new file mode 100644 index 0000000..fabcc5a --- /dev/null +++ b/box_coders/keypoint_box_coder.py @@ -0,0 +1,173 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Keypoint box coder. + +The keypoint box coder follows the coding schema described below (this is +similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition +to box coordinates): + ty = (y - ya) / ha + tx = (x - xa) / wa + th = log(h / ha) + tw = log(w / wa) + tky0 = (ky0 - ya) / ha + tkx0 = (kx0 - xa) / wa + tky1 = (ky1 - ya) / ha + tkx1 = (kx1 - xa) / wa + ... + where x, y, w, h denote the box's center coordinates, width and height + respectively. Similarly, xa, ya, wa, ha denote the anchor's center + coordinates, width and height. tx, ty, tw and th denote the anchor-encoded + center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the + keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the + anchor-encoded keypoint coordinates. +""" + +import tensorflow as tf + +from object_detection.core import box_coder +from object_detection.core import box_list +from object_detection.core import standard_fields as fields + +EPSILON = 1e-8 + + +class KeypointBoxCoder(box_coder.BoxCoder): + """Keypoint box coder.""" + + def __init__(self, num_keypoints, scale_factors=None): + """Constructor for KeypointBoxCoder. + + Args: + num_keypoints: Number of keypoints to encode/decode. + scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. + In addition to scaling ty and tx, the first 2 scalars are used to scale + the y and x coordinates of the keypoints as well. If set to None, does + not perform scaling. + """ + self._num_keypoints = num_keypoints + + if scale_factors: + assert len(scale_factors) == 4 + for scalar in scale_factors: + assert scalar > 0 + self._scale_factors = scale_factors + self._keypoint_scale_factors = None + if scale_factors is not None: + self._keypoint_scale_factors = tf.expand_dims( + tf.tile([ + tf.cast(scale_factors[0], dtype=tf.float32), + tf.cast(scale_factors[1], dtype=tf.float32) + ], [num_keypoints]), 1) + + @property + def code_size(self): + return 4 + self._num_keypoints * 2 + + def _encode(self, boxes, anchors): + """Encode a box and keypoint collection with respect to anchor collection. + + Args: + boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are + tensors with the shape [N, 4], and keypoints are tensors with the shape + [N, num_keypoints, 2]. + anchors: BoxList of anchors. + + Returns: + a tensor representing N anchor-encoded boxes of the format + [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0 + represent the y and x coordinates of the first keypoint, tky1 and tkx1 + represent the y and x coordinates of the second keypoint, and so on. + """ + # Convert anchors to the center coordinate representation. + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() + keypoints = boxes.get_field(fields.BoxListFields.keypoints) + keypoints = tf.transpose(tf.reshape(keypoints, + [-1, self._num_keypoints * 2])) + num_boxes = boxes.num_boxes() + + # Avoid NaN in division and log below. + ha += EPSILON + wa += EPSILON + h += EPSILON + w += EPSILON + + tx = (xcenter - xcenter_a) / wa + ty = (ycenter - ycenter_a) / ha + tw = tf.log(w / wa) + th = tf.log(h / ha) + + tiled_anchor_centers = tf.tile( + tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) + tiled_anchor_sizes = tf.tile( + tf.stack([ha, wa]), [self._num_keypoints, 1]) + tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes + + # Scales location targets as used in paper for joint training. + if self._scale_factors: + ty *= self._scale_factors[0] + tx *= self._scale_factors[1] + th *= self._scale_factors[2] + tw *= self._scale_factors[3] + tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes]) + + tboxes = tf.stack([ty, tx, th, tw]) + return tf.transpose(tf.concat([tboxes, tkeypoints], 0)) + + def _decode(self, rel_codes, anchors): + """Decode relative codes to boxes and keypoints. + + Args: + rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N + anchor-encoded boxes and keypoints + anchors: BoxList of anchors. + + Returns: + boxes: BoxList holding N bounding boxes and keypoints. + """ + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + + num_codes = tf.shape(rel_codes)[0] + result = tf.unstack(tf.transpose(rel_codes)) + ty, tx, th, tw = result[:4] + tkeypoints = result[4:] + if self._scale_factors: + ty /= self._scale_factors[0] + tx /= self._scale_factors[1] + th /= self._scale_factors[2] + tw /= self._scale_factors[3] + tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes]) + + w = tf.exp(tw) * wa + h = tf.exp(th) * ha + ycenter = ty * ha + ycenter_a + xcenter = tx * wa + xcenter_a + ymin = ycenter - h / 2. + xmin = xcenter - w / 2. + ymax = ycenter + h / 2. + xmax = xcenter + w / 2. + decoded_boxes_keypoints = box_list.BoxList( + tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) + + tiled_anchor_centers = tf.tile( + tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) + tiled_anchor_sizes = tf.tile( + tf.stack([ha, wa]), [self._num_keypoints, 1]) + keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers + keypoints = tf.reshape(tf.transpose(keypoints), + [-1, self._num_keypoints, 2]) + decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints) + return decoded_boxes_keypoints diff --git a/box_coders/keypoint_box_coder_test.py b/box_coders/keypoint_box_coder_test.py new file mode 100644 index 0000000..330641e --- /dev/null +++ b/box_coders/keypoint_box_coder_test.py @@ -0,0 +1,140 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.box_coder.keypoint_box_coder.""" + +import tensorflow as tf + +from object_detection.box_coders import keypoint_box_coder +from object_detection.core import box_list +from object_detection.core import standard_fields as fields + + +class KeypointBoxCoderTest(tf.test.TestCase): + + def test_get_correct_relative_codes_after_encoding(self): + boxes = [[10., 10., 20., 15.], + [0.2, 0.1, 0.5, 0.4]] + keypoints = [[[15., 12.], [10., 15.]], + [[0.5, 0.3], [0.2, 0.4]]] + num_keypoints = len(keypoints[0]) + anchors = [[15., 12., 30., 18.], + [0.1, 0.0, 0.7, 0.9]] + expected_rel_codes = [ + [-0.5, -0.416666, -0.405465, -0.182321, + -0.5, -0.5, -0.833333, 0.], + [-0.083333, -0.222222, -0.693147, -1.098612, + 0.166667, -0.166667, -0.333333, -0.055556] + ] + boxes = box_list.BoxList(tf.constant(boxes)) + boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_get_correct_relative_codes_after_encoding_with_scaling(self): + boxes = [[10., 10., 20., 15.], + [0.2, 0.1, 0.5, 0.4]] + keypoints = [[[15., 12.], [10., 15.]], + [[0.5, 0.3], [0.2, 0.4]]] + num_keypoints = len(keypoints[0]) + anchors = [[15., 12., 30., 18.], + [0.1, 0.0, 0.7, 0.9]] + scale_factors = [2, 3, 4, 5] + expected_rel_codes = [ + [-1., -1.25, -1.62186, -0.911608, + -1.0, -1.5, -1.666667, 0.], + [-0.166667, -0.666667, -2.772588, -5.493062, + 0.333333, -0.5, -0.666667, -0.166667] + ] + boxes = box_list.BoxList(tf.constant(boxes)) + boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = keypoint_box_coder.KeypointBoxCoder( + num_keypoints, scale_factors=scale_factors) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_get_correct_boxes_after_decoding(self): + anchors = [[15., 12., 30., 18.], + [0.1, 0.0, 0.7, 0.9]] + rel_codes = [ + [-0.5, -0.416666, -0.405465, -0.182321, + -0.5, -0.5, -0.833333, 0.], + [-0.083333, -0.222222, -0.693147, -1.098612, + 0.166667, -0.166667, -0.333333, -0.055556] + ] + expected_boxes = [[10., 10., 20., 15.], + [0.2, 0.1, 0.5, 0.4]] + expected_keypoints = [[[15., 12.], [10., 15.]], + [[0.5, 0.3], [0.2, 0.4]]] + num_keypoints = len(expected_keypoints[0]) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + boxes_out, keypoints_out = sess.run( + [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) + self.assertAllClose(boxes_out, expected_boxes) + self.assertAllClose(keypoints_out, expected_keypoints) + + def test_get_correct_boxes_after_decoding_with_scaling(self): + anchors = [[15., 12., 30., 18.], + [0.1, 0.0, 0.7, 0.9]] + rel_codes = [ + [-1., -1.25, -1.62186, -0.911608, + -1.0, -1.5, -1.666667, 0.], + [-0.166667, -0.666667, -2.772588, -5.493062, + 0.333333, -0.5, -0.666667, -0.166667] + ] + scale_factors = [2, 3, 4, 5] + expected_boxes = [[10., 10., 20., 15.], + [0.2, 0.1, 0.5, 0.4]] + expected_keypoints = [[[15., 12.], [10., 15.]], + [[0.5, 0.3], [0.2, 0.4]]] + num_keypoints = len(expected_keypoints[0]) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = keypoint_box_coder.KeypointBoxCoder( + num_keypoints, scale_factors=scale_factors) + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + boxes_out, keypoints_out = sess.run( + [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) + self.assertAllClose(boxes_out, expected_boxes) + self.assertAllClose(keypoints_out, expected_keypoints) + + def test_very_small_width_nan_after_encoding(self): + boxes = [[10., 10., 10.0000001, 20.]] + keypoints = [[[10., 10.], [10.0000001, 20.]]] + anchors = [[15., 12., 30., 18.]] + expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826, + -0.833333, -0.833333, -0.833333, 0.833333]] + boxes = box_list.BoxList(tf.constant(boxes)) + boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = keypoint_box_coder.KeypointBoxCoder(2) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + rel_codes_out, = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + +if __name__ == '__main__': + tf.test.main() diff --git a/box_coders/mean_stddev_box_coder_test.py b/box_coders/mean_stddev_box_coder_test.py new file mode 100644 index 0000000..3e0eba9 --- /dev/null +++ b/box_coders/mean_stddev_box_coder_test.py @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.box_coder.mean_stddev_boxcoder.""" + +import tensorflow as tf + +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.core import box_list + + +class MeanStddevBoxCoderTest(tf.test.TestCase): + + def testGetCorrectRelativeCodesAfterEncoding(self): + box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] + boxes = box_list.BoxList(tf.constant(box_corners)) + expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]] + prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) + priors = box_list.BoxList(prior_means) + + coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + rel_codes = coder.encode(boxes, priors) + with self.test_session() as sess: + rel_codes_out = sess.run(rel_codes) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def testGetCorrectBoxesAfterDecoding(self): + rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]) + expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] + prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) + priors = box_list.BoxList(prior_means) + + coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + decoded_boxes = coder.decode(rel_codes, priors) + decoded_box_corners = decoded_boxes.get() + with self.test_session() as sess: + decoded_out = sess.run(decoded_box_corners) + self.assertAllClose(decoded_out, expected_box_corners) + + +if __name__ == '__main__': + tf.test.main() diff --git a/box_coders/square_box_coder.py b/box_coders/square_box_coder.py new file mode 100644 index 0000000..ee46b68 --- /dev/null +++ b/box_coders/square_box_coder.py @@ -0,0 +1,126 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Square box coder. + +Square box coder follows the coding schema described below: +l = sqrt(h * w) +la = sqrt(ha * wa) +ty = (y - ya) / la +tx = (x - xa) / la +tl = log(l / la) +where x, y, w, h denote the box's center coordinates, width, and height, +respectively. Similarly, xa, ya, wa, ha denote the anchor's center +coordinates, width and height. tx, ty, tl denote the anchor-encoded +center, and length, respectively. Because the encoded box is a square, only +one length is encoded. + +This has shown to provide performance improvements over the Faster RCNN box +coder when the objects being detected tend to be square (e.g. faces) and when +the input images are not distorted via resizing. +""" + +import tensorflow as tf + +from object_detection.core import box_coder +from object_detection.core import box_list + +EPSILON = 1e-8 + + +class SquareBoxCoder(box_coder.BoxCoder): + """Encodes a 3-scalar representation of a square box.""" + + def __init__(self, scale_factors=None): + """Constructor for SquareBoxCoder. + + Args: + scale_factors: List of 3 positive scalars to scale ty, tx, and tl. + If set to None, does not perform scaling. For faster RCNN, + the open-source implementation recommends using [10.0, 10.0, 5.0]. + + Raises: + ValueError: If scale_factors is not length 3 or contains values less than + or equal to 0. + """ + if scale_factors: + if len(scale_factors) != 3: + raise ValueError('The argument scale_factors must be a list of length ' + '3.') + if any(scalar <= 0 for scalar in scale_factors): + raise ValueError('The values in scale_factors must all be greater ' + 'than 0.') + self._scale_factors = scale_factors + + @property + def code_size(self): + return 3 + + def _encode(self, boxes, anchors): + """Encodes a box collection with respect to an anchor collection. + + Args: + boxes: BoxList holding N boxes to be encoded. + anchors: BoxList of anchors. + + Returns: + a tensor representing N anchor-encoded boxes of the format + [ty, tx, tl]. + """ + # Convert anchors to the center coordinate representation. + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + la = tf.sqrt(ha * wa) + ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() + l = tf.sqrt(h * w) + # Avoid NaN in division and log below. + la += EPSILON + l += EPSILON + + tx = (xcenter - xcenter_a) / la + ty = (ycenter - ycenter_a) / la + tl = tf.log(l / la) + # Scales location targets for joint training. + if self._scale_factors: + ty *= self._scale_factors[0] + tx *= self._scale_factors[1] + tl *= self._scale_factors[2] + return tf.transpose(tf.stack([ty, tx, tl])) + + def _decode(self, rel_codes, anchors): + """Decodes relative codes to boxes. + + Args: + rel_codes: a tensor representing N anchor-encoded boxes. + anchors: BoxList of anchors. + + Returns: + boxes: BoxList holding N bounding boxes. + """ + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + la = tf.sqrt(ha * wa) + + ty, tx, tl = tf.unstack(tf.transpose(rel_codes)) + if self._scale_factors: + ty /= self._scale_factors[0] + tx /= self._scale_factors[1] + tl /= self._scale_factors[2] + l = tf.exp(tl) * la + ycenter = ty * la + ycenter_a + xcenter = tx * la + xcenter_a + ymin = ycenter - l / 2. + xmin = xcenter - l / 2. + ymax = ycenter + l / 2. + xmax = xcenter + l / 2. + return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/box_coders/square_box_coder_test.py b/box_coders/square_box_coder_test.py new file mode 100644 index 0000000..7f739c6 --- /dev/null +++ b/box_coders/square_box_coder_test.py @@ -0,0 +1,97 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.box_coder.square_box_coder.""" + +import tensorflow as tf + +from object_detection.box_coders import square_box_coder +from object_detection.core import box_list + + +class SquareBoxCoderTest(tf.test.TestCase): + + def test_correct_relative_codes_with_default_scale(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + scale_factors = None + expected_rel_codes = [[-0.790569, -0.263523, -0.293893], + [-0.068041, -0.272166, -0.89588]] + + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + (rel_codes_out,) = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_correct_relative_codes_with_non_default_scale(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + scale_factors = [2, 3, 4] + expected_rel_codes = [[-1.581139, -0.790569, -1.175573], + [-0.136083, -0.816497, -3.583519]] + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + (rel_codes_out,) = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_correct_relative_codes_with_small_width(self): + boxes = [[10.0, 10.0, 10.0000001, 20.0]] + anchors = [[15.0, 12.0, 30.0, 18.0]] + scale_factors = None + expected_rel_codes = [[-1.317616, 0., -20.670586]] + boxes = box_list.BoxList(tf.constant(boxes)) + anchors = box_list.BoxList(tf.constant(anchors)) + coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) + rel_codes = coder.encode(boxes, anchors) + with self.test_session() as sess: + (rel_codes_out,) = sess.run([rel_codes]) + self.assertAllClose(rel_codes_out, expected_rel_codes) + + def test_correct_boxes_with_default_scale(self): + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + rel_codes = [[-0.5, -0.416666, -0.405465], + [-0.083333, -0.222222, -0.693147]] + scale_factors = None + expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], + [0.155051, 0.102989, 0.522474, 0.470412]] + anchors = box_list.BoxList(tf.constant(anchors)) + coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + (boxes_out,) = sess.run([boxes.get()]) + self.assertAllClose(boxes_out, expected_boxes) + + def test_correct_boxes_with_non_default_scale(self): + anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] + rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]] + scale_factors = [2, 3, 4] + expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], + [0.155051, 0.102989, 0.522474, 0.470412]] + anchors = box_list.BoxList(tf.constant(anchors)) + coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) + boxes = coder.decode(rel_codes, anchors) + with self.test_session() as sess: + (boxes_out,) = sess.run([boxes.get()]) + self.assertAllClose(boxes_out, expected_boxes) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/anchor_generator_builder.py b/builders/anchor_generator_builder.py new file mode 100644 index 0000000..81219a1 --- /dev/null +++ b/builders/anchor_generator_builder.py @@ -0,0 +1,109 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build an object detection anchor generator from config.""" + +from object_detection.anchor_generators import flexible_grid_anchor_generator +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.anchor_generators import multiple_grid_anchor_generator +from object_detection.anchor_generators import multiscale_grid_anchor_generator +from object_detection.protos import anchor_generator_pb2 + + +def build(anchor_generator_config): + """Builds an anchor generator based on the config. + + Args: + anchor_generator_config: An anchor_generator.proto object containing the + config for the desired anchor generator. + + Returns: + Anchor generator based on the config. + + Raises: + ValueError: On empty anchor generator proto. + """ + if not isinstance(anchor_generator_config, + anchor_generator_pb2.AnchorGenerator): + raise ValueError('anchor_generator_config not of type ' + 'anchor_generator_pb2.AnchorGenerator') + if anchor_generator_config.WhichOneof( + 'anchor_generator_oneof') == 'grid_anchor_generator': + grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator + return grid_anchor_generator.GridAnchorGenerator( + scales=[float(scale) for scale in grid_anchor_generator_config.scales], + aspect_ratios=[float(aspect_ratio) + for aspect_ratio + in grid_anchor_generator_config.aspect_ratios], + base_anchor_size=[grid_anchor_generator_config.height, + grid_anchor_generator_config.width], + anchor_stride=[grid_anchor_generator_config.height_stride, + grid_anchor_generator_config.width_stride], + anchor_offset=[grid_anchor_generator_config.height_offset, + grid_anchor_generator_config.width_offset]) + elif anchor_generator_config.WhichOneof( + 'anchor_generator_oneof') == 'ssd_anchor_generator': + ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator + anchor_strides = None + if ssd_anchor_generator_config.height_stride: + anchor_strides = zip(ssd_anchor_generator_config.height_stride, + ssd_anchor_generator_config.width_stride) + anchor_offsets = None + if ssd_anchor_generator_config.height_offset: + anchor_offsets = zip(ssd_anchor_generator_config.height_offset, + ssd_anchor_generator_config.width_offset) + return multiple_grid_anchor_generator.create_ssd_anchors( + num_layers=ssd_anchor_generator_config.num_layers, + min_scale=ssd_anchor_generator_config.min_scale, + max_scale=ssd_anchor_generator_config.max_scale, + scales=[float(scale) for scale in ssd_anchor_generator_config.scales], + aspect_ratios=ssd_anchor_generator_config.aspect_ratios, + interpolated_scale_aspect_ratio=( + ssd_anchor_generator_config.interpolated_scale_aspect_ratio), + base_anchor_size=[ + ssd_anchor_generator_config.base_anchor_height, + ssd_anchor_generator_config.base_anchor_width + ], + anchor_strides=anchor_strides, + anchor_offsets=anchor_offsets, + reduce_boxes_in_lowest_layer=( + ssd_anchor_generator_config.reduce_boxes_in_lowest_layer)) + elif anchor_generator_config.WhichOneof( + 'anchor_generator_oneof') == 'multiscale_anchor_generator': + cfg = anchor_generator_config.multiscale_anchor_generator + return multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator( + cfg.min_level, + cfg.max_level, + cfg.anchor_scale, + [float(aspect_ratio) for aspect_ratio in cfg.aspect_ratios], + cfg.scales_per_octave, + cfg.normalize_coordinates + ) + elif anchor_generator_config.WhichOneof( + 'anchor_generator_oneof') == 'flexible_grid_anchor_generator': + cfg = anchor_generator_config.flexible_grid_anchor_generator + base_sizes = [] + aspect_ratios = [] + strides = [] + offsets = [] + for anchor_grid in cfg.anchor_grid: + base_sizes.append(tuple(anchor_grid.base_sizes)) + aspect_ratios.append(tuple(anchor_grid.aspect_ratios)) + strides.append((anchor_grid.height_stride, anchor_grid.width_stride)) + offsets.append((anchor_grid.height_offset, anchor_grid.width_offset)) + return flexible_grid_anchor_generator.FlexibleGridAnchorGenerator( + base_sizes, aspect_ratios, strides, offsets, cfg.normalize_coordinates) + else: + raise ValueError('Empty anchor generator.') diff --git a/builders/anchor_generator_builder_test.py b/builders/anchor_generator_builder_test.py new file mode 100644 index 0000000..4b2cf5d --- /dev/null +++ b/builders/anchor_generator_builder_test.py @@ -0,0 +1,332 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for anchor_generator_builder.""" + +import math + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.anchor_generators import flexible_grid_anchor_generator +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.anchor_generators import multiple_grid_anchor_generator +from object_detection.anchor_generators import multiscale_grid_anchor_generator +from object_detection.builders import anchor_generator_builder +from object_detection.protos import anchor_generator_pb2 + + +class AnchorGeneratorBuilderTest(tf.test.TestCase): + + def assert_almost_list_equal(self, expected_list, actual_list, delta=None): + self.assertEqual(len(expected_list), len(actual_list)) + for expected_item, actual_item in zip(expected_list, actual_list): + self.assertAlmostEqual(expected_item, actual_item, delta=delta) + + def test_build_grid_anchor_generator_with_defaults(self): + anchor_generator_text_proto = """ + grid_anchor_generator { + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + grid_anchor_generator.GridAnchorGenerator) + self.assertListEqual(anchor_generator_object._scales, []) + self.assertListEqual(anchor_generator_object._aspect_ratios, []) + self.assertAllEqual(anchor_generator_object._anchor_offset, [0, 0]) + self.assertAllEqual(anchor_generator_object._anchor_stride, [16, 16]) + self.assertAllEqual(anchor_generator_object._base_anchor_size, [256, 256]) + + def test_build_grid_anchor_generator_with_non_default_parameters(self): + anchor_generator_text_proto = """ + grid_anchor_generator { + height: 128 + width: 512 + height_stride: 10 + width_stride: 20 + height_offset: 30 + width_offset: 40 + scales: [0.4, 2.2] + aspect_ratios: [0.3, 4.5] + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + grid_anchor_generator.GridAnchorGenerator) + self.assert_almost_list_equal(anchor_generator_object._scales, + [0.4, 2.2]) + self.assert_almost_list_equal(anchor_generator_object._aspect_ratios, + [0.3, 4.5]) + self.assertAllEqual(anchor_generator_object._anchor_offset, [30, 40]) + self.assertAllEqual(anchor_generator_object._anchor_stride, [10, 20]) + self.assertAllEqual(anchor_generator_object._base_anchor_size, [128, 512]) + + def test_build_ssd_anchor_generator_with_defaults(self): + anchor_generator_text_proto = """ + ssd_anchor_generator { + aspect_ratios: [1.0] + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiple_grid_anchor_generator. + MultipleGridAnchorGenerator) + for actual_scales, expected_scales in zip( + list(anchor_generator_object._scales), + [(0.1, 0.2, 0.2), + (0.35, 0.418), + (0.499, 0.570), + (0.649, 0.721), + (0.799, 0.871), + (0.949, 0.974)]): + self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) + for actual_aspect_ratio, expected_aspect_ratio in zip( + list(anchor_generator_object._aspect_ratios), + [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]): + self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) + self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0]) + + def test_build_ssd_anchor_generator_with_custom_scales(self): + anchor_generator_text_proto = """ + ssd_anchor_generator { + aspect_ratios: [1.0] + scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8] + reduce_boxes_in_lowest_layer: false + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiple_grid_anchor_generator. + MultipleGridAnchorGenerator) + for actual_scales, expected_scales in zip( + list(anchor_generator_object._scales), + [(0.1, math.sqrt(0.1 * 0.15)), + (0.15, math.sqrt(0.15 * 0.2)), + (0.2, math.sqrt(0.2 * 0.4)), + (0.4, math.sqrt(0.4 * 0.6)), + (0.6, math.sqrt(0.6 * 0.8)), + (0.8, math.sqrt(0.8 * 1.0))]): + self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) + + def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self): + anchor_generator_text_proto = """ + ssd_anchor_generator { + aspect_ratios: [0.5] + interpolated_scale_aspect_ratio: 0.5 + reduce_boxes_in_lowest_layer: false + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiple_grid_anchor_generator. + MultipleGridAnchorGenerator) + for actual_aspect_ratio, expected_aspect_ratio in zip( + list(anchor_generator_object._aspect_ratios), + 6 * [(0.5, 0.5)]): + self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) + + def test_build_ssd_anchor_generator_without_reduced_boxes(self): + anchor_generator_text_proto = """ + ssd_anchor_generator { + aspect_ratios: [1.0] + reduce_boxes_in_lowest_layer: false + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiple_grid_anchor_generator. + MultipleGridAnchorGenerator) + + for actual_scales, expected_scales in zip( + list(anchor_generator_object._scales), + [(0.2, 0.264), + (0.35, 0.418), + (0.499, 0.570), + (0.649, 0.721), + (0.799, 0.871), + (0.949, 0.974)]): + self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) + + for actual_aspect_ratio, expected_aspect_ratio in zip( + list(anchor_generator_object._aspect_ratios), + 6 * [(1.0, 1.0)]): + self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) + + self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0]) + + def test_build_ssd_anchor_generator_with_non_default_parameters(self): + anchor_generator_text_proto = """ + ssd_anchor_generator { + num_layers: 2 + min_scale: 0.3 + max_scale: 0.8 + aspect_ratios: [2.0] + height_stride: 16 + height_stride: 32 + width_stride: 20 + width_stride: 30 + height_offset: 8 + height_offset: 16 + width_offset: 0 + width_offset: 10 + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiple_grid_anchor_generator. + MultipleGridAnchorGenerator) + + for actual_scales, expected_scales in zip( + list(anchor_generator_object._scales), + [(0.1, 0.3, 0.3), (0.8, 0.894)]): + self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) + + for actual_aspect_ratio, expected_aspect_ratio in zip( + list(anchor_generator_object._aspect_ratios), + [(1.0, 2.0, 0.5), (2.0, 1.0)]): + self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) + + for actual_strides, expected_strides in zip( + list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]): + self.assert_almost_list_equal(expected_strides, actual_strides) + + for actual_offsets, expected_offsets in zip( + list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]): + self.assert_almost_list_equal(expected_offsets, actual_offsets) + + self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0]) + + def test_raise_value_error_on_empty_anchor_genertor(self): + anchor_generator_text_proto = """ + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + with self.assertRaises(ValueError): + anchor_generator_builder.build(anchor_generator_proto) + + def test_build_multiscale_anchor_generator_custom_aspect_ratios(self): + anchor_generator_text_proto = """ + multiscale_anchor_generator { + aspect_ratios: [1.0] + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiscale_grid_anchor_generator. + MultiscaleGridAnchorGenerator) + for level, anchor_grid_info in zip( + range(3, 8), anchor_generator_object._anchor_grid_info): + self.assertEqual(set(anchor_grid_info.keys()), set(['level', 'info'])) + self.assertTrue(level, anchor_grid_info['level']) + self.assertEqual(len(anchor_grid_info['info']), 4) + self.assertAllClose(anchor_grid_info['info'][0], [2**0, 2**0.5]) + self.assertTrue(anchor_grid_info['info'][1], 1.0) + self.assertAllClose(anchor_grid_info['info'][2], + [4.0 * 2**level, 4.0 * 2**level]) + self.assertAllClose(anchor_grid_info['info'][3], [2**level, 2**level]) + self.assertTrue(anchor_generator_object._normalize_coordinates) + + def test_build_multiscale_anchor_generator_with_anchors_in_pixel_coordinates( + self): + anchor_generator_text_proto = """ + multiscale_anchor_generator { + aspect_ratios: [1.0] + normalize_coordinates: false + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + multiscale_grid_anchor_generator. + MultiscaleGridAnchorGenerator) + self.assertFalse(anchor_generator_object._normalize_coordinates) + + def test_build_flexible_anchor_generator(self): + anchor_generator_text_proto = """ + flexible_grid_anchor_generator { + anchor_grid { + base_sizes: [1.5] + aspect_ratios: [1.0] + height_stride: 16 + width_stride: 20 + height_offset: 8 + width_offset: 9 + } + anchor_grid { + base_sizes: [1.0, 2.0] + aspect_ratios: [1.0, 0.5] + height_stride: 32 + width_stride: 30 + height_offset: 10 + width_offset: 11 + } + } + """ + anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() + text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) + anchor_generator_object = anchor_generator_builder.build( + anchor_generator_proto) + self.assertIsInstance(anchor_generator_object, + flexible_grid_anchor_generator. + FlexibleGridAnchorGenerator) + + for actual_base_sizes, expected_base_sizes in zip( + list(anchor_generator_object._base_sizes), [(1.5,), (1.0, 2.0)]): + self.assert_almost_list_equal(expected_base_sizes, actual_base_sizes) + + for actual_aspect_ratios, expected_aspect_ratios in zip( + list(anchor_generator_object._aspect_ratios), [(1.0,), (1.0, 0.5)]): + self.assert_almost_list_equal(expected_aspect_ratios, + actual_aspect_ratios) + + for actual_strides, expected_strides in zip( + list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]): + self.assert_almost_list_equal(expected_strides, actual_strides) + + for actual_offsets, expected_offsets in zip( + list(anchor_generator_object._anchor_offsets), [(8, 9), (10, 11)]): + self.assert_almost_list_equal(expected_offsets, actual_offsets) + + self.assertTrue(anchor_generator_object._normalize_coordinates) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/box_coder_builder.py b/builders/box_coder_builder.py new file mode 100644 index 0000000..cc13d5a --- /dev/null +++ b/builders/box_coder_builder.py @@ -0,0 +1,66 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build an object detection box coder from configuration.""" +from object_detection.box_coders import faster_rcnn_box_coder +from object_detection.box_coders import keypoint_box_coder +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.box_coders import square_box_coder +from object_detection.protos import box_coder_pb2 + + +def build(box_coder_config): + """Builds a box coder object based on the box coder config. + + Args: + box_coder_config: A box_coder.proto object containing the config for the + desired box coder. + + Returns: + BoxCoder based on the config. + + Raises: + ValueError: On empty box coder proto. + """ + if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): + raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.') + + if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder': + return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ + box_coder_config.faster_rcnn_box_coder.y_scale, + box_coder_config.faster_rcnn_box_coder.x_scale, + box_coder_config.faster_rcnn_box_coder.height_scale, + box_coder_config.faster_rcnn_box_coder.width_scale + ]) + if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder': + return keypoint_box_coder.KeypointBoxCoder( + box_coder_config.keypoint_box_coder.num_keypoints, + scale_factors=[ + box_coder_config.keypoint_box_coder.y_scale, + box_coder_config.keypoint_box_coder.x_scale, + box_coder_config.keypoint_box_coder.height_scale, + box_coder_config.keypoint_box_coder.width_scale + ]) + if (box_coder_config.WhichOneof('box_coder_oneof') == + 'mean_stddev_box_coder'): + return mean_stddev_box_coder.MeanStddevBoxCoder( + stddev=box_coder_config.mean_stddev_box_coder.stddev) + if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': + return square_box_coder.SquareBoxCoder(scale_factors=[ + box_coder_config.square_box_coder.y_scale, + box_coder_config.square_box_coder.x_scale, + box_coder_config.square_box_coder.length_scale + ]) + raise ValueError('Empty box coder.') diff --git a/builders/box_coder_builder_test.py b/builders/box_coder_builder_test.py new file mode 100644 index 0000000..286012e --- /dev/null +++ b/builders/box_coder_builder_test.py @@ -0,0 +1,136 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for box_coder_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.box_coders import faster_rcnn_box_coder +from object_detection.box_coders import keypoint_box_coder +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.box_coders import square_box_coder +from object_detection.builders import box_coder_builder +from object_detection.protos import box_coder_pb2 + + +class BoxCoderBuilderTest(tf.test.TestCase): + + def test_build_faster_rcnn_box_coder_with_defaults(self): + box_coder_text_proto = """ + faster_rcnn_box_coder { + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertIsInstance(box_coder_object, + faster_rcnn_box_coder.FasterRcnnBoxCoder) + self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) + + def test_build_faster_rcnn_box_coder_with_non_default_parameters(self): + box_coder_text_proto = """ + faster_rcnn_box_coder { + y_scale: 6.0 + x_scale: 3.0 + height_scale: 7.0 + width_scale: 8.0 + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertIsInstance(box_coder_object, + faster_rcnn_box_coder.FasterRcnnBoxCoder) + self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) + + def test_build_keypoint_box_coder_with_defaults(self): + box_coder_text_proto = """ + keypoint_box_coder { + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) + self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) + + def test_build_keypoint_box_coder_with_non_default_parameters(self): + box_coder_text_proto = """ + keypoint_box_coder { + num_keypoints: 6 + y_scale: 6.0 + x_scale: 3.0 + height_scale: 7.0 + width_scale: 8.0 + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) + self.assertEqual(box_coder_object._num_keypoints, 6) + self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) + + def test_build_mean_stddev_box_coder(self): + box_coder_text_proto = """ + mean_stddev_box_coder { + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertTrue( + isinstance(box_coder_object, + mean_stddev_box_coder.MeanStddevBoxCoder)) + + def test_build_square_box_coder_with_defaults(self): + box_coder_text_proto = """ + square_box_coder { + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertTrue( + isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) + self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0]) + + def test_build_square_box_coder_with_non_default_parameters(self): + box_coder_text_proto = """ + square_box_coder { + y_scale: 6.0 + x_scale: 3.0 + length_scale: 7.0 + } + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + box_coder_object = box_coder_builder.build(box_coder_proto) + self.assertTrue( + isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) + self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0]) + + def test_raise_error_on_empty_box_coder(self): + box_coder_text_proto = """ + """ + box_coder_proto = box_coder_pb2.BoxCoder() + text_format.Merge(box_coder_text_proto, box_coder_proto) + with self.assertRaises(ValueError): + box_coder_builder.build(box_coder_proto) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/box_predictor_builder.py b/builders/box_predictor_builder.py new file mode 100644 index 0000000..439efff --- /dev/null +++ b/builders/box_predictor_builder.py @@ -0,0 +1,975 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Function to build box predictor from configuration.""" + +import collections +import tensorflow as tf +from object_detection.predictors import convolutional_box_predictor +from object_detection.predictors import convolutional_keras_box_predictor +from object_detection.predictors import mask_rcnn_box_predictor +from object_detection.predictors import mask_rcnn_keras_box_predictor +from object_detection.predictors import rfcn_box_predictor +from object_detection.predictors import rfcn_keras_box_predictor +from object_detection.predictors.heads import box_head +from object_detection.predictors.heads import class_head +from object_detection.predictors.heads import keras_box_head +from object_detection.predictors.heads import keras_class_head +from object_detection.predictors.heads import keras_mask_head +from object_detection.predictors.heads import mask_head +from object_detection.protos import box_predictor_pb2 + + +def build_convolutional_box_predictor(is_training, + num_classes, + conv_hyperparams_fn, + min_depth, + max_depth, + num_layers_before_predictor, + use_dropout, + dropout_keep_prob, + kernel_size, + box_code_size, + apply_sigmoid_to_scores=False, + add_background_class=True, + class_prediction_bias_init=0.0, + use_depthwise=False, + box_encodings_clip_range=None): + """Builds the ConvolutionalBoxPredictor from the arguments. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + min_depth: Minimum feature depth prior to predicting box encodings + and class predictions. + max_depth: Maximum feature depth prior to predicting box encodings + and class predictions. If max_depth is set to 0, no additional + feature map will be inserted before location and class predictions. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + box_code_size: Size of encoding for each box. + apply_sigmoid_to_scores: If True, apply the sigmoid on the output + class_predictions. + add_background_class: Whether to add an implicit background class. + class_prediction_bias_init: Constant value to initialize bias of the last + conv2d layer before class prediction. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + box_encodings_clip_range: Min and max values for clipping the box_encodings. + + Returns: + A ConvolutionalBoxPredictor class. + """ + box_prediction_head = box_head.ConvolutionalBoxHead( + is_training=is_training, + box_code_size=box_code_size, + kernel_size=kernel_size, + use_depthwise=use_depthwise, + box_encodings_clip_range=box_encodings_clip_range) + class_prediction_head = class_head.ConvolutionalClassHead( + is_training=is_training, + num_class_slots=num_classes + 1 if add_background_class else num_classes, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + kernel_size=kernel_size, + apply_sigmoid_to_scores=apply_sigmoid_to_scores, + class_prediction_bias_init=class_prediction_bias_init, + use_depthwise=use_depthwise) + other_heads = {} + return convolutional_box_predictor.ConvolutionalBoxPredictor( + is_training=is_training, + num_classes=num_classes, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + other_heads=other_heads, + conv_hyperparams_fn=conv_hyperparams_fn, + num_layers_before_predictor=num_layers_before_predictor, + min_depth=min_depth, + max_depth=max_depth) + + +def build_convolutional_keras_box_predictor(is_training, + num_classes, + conv_hyperparams, + freeze_batchnorm, + inplace_batchnorm_update, + num_predictions_per_location_list, + min_depth, + max_depth, + num_layers_before_predictor, + use_dropout, + dropout_keep_prob, + kernel_size, + box_code_size, + add_background_class=True, + class_prediction_bias_init=0.0, + use_depthwise=False, + box_encodings_clip_range=None, + name='BoxPredictor'): + """Builds the Keras ConvolutionalBoxPredictor from the arguments. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + num_predictions_per_location_list: A list of integers representing the + number of box predictions to be made per spatial location for each + feature map. + min_depth: Minimum feature depth prior to predicting box encodings + and class predictions. + max_depth: Maximum feature depth prior to predicting box encodings + and class predictions. If max_depth is set to 0, no additional + feature map will be inserted before location and class predictions. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + box_code_size: Size of encoding for each box. + add_background_class: Whether to add an implicit background class. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + box_encodings_clip_range: Min and max values for clipping the box_encodings. + name: A string name scope to assign to the box predictor. If `None`, Keras + will auto-generate one from the class name. + + Returns: + A Keras ConvolutionalBoxPredictor class. + """ + box_prediction_heads = [] + class_prediction_heads = [] + other_heads = {} + + for stack_index, num_predictions_per_location in enumerate( + num_predictions_per_location_list): + box_prediction_heads.append( + keras_box_head.ConvolutionalBoxHead( + is_training=is_training, + box_code_size=box_code_size, + kernel_size=kernel_size, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + num_predictions_per_location=num_predictions_per_location, + use_depthwise=use_depthwise, + box_encodings_clip_range=box_encodings_clip_range, + name='ConvolutionalBoxHead_%d' % stack_index)) + class_prediction_heads.append( + keras_class_head.ConvolutionalClassHead( + is_training=is_training, + num_class_slots=( + num_classes + 1 if add_background_class else num_classes), + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + kernel_size=kernel_size, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + num_predictions_per_location=num_predictions_per_location, + class_prediction_bias_init=class_prediction_bias_init, + use_depthwise=use_depthwise, + name='ConvolutionalClassHead_%d' % stack_index)) + + return convolutional_keras_box_predictor.ConvolutionalBoxPredictor( + is_training=is_training, + num_classes=num_classes, + box_prediction_heads=box_prediction_heads, + class_prediction_heads=class_prediction_heads, + other_heads=other_heads, + conv_hyperparams=conv_hyperparams, + num_layers_before_predictor=num_layers_before_predictor, + min_depth=min_depth, + max_depth=max_depth, + freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + name=name) + + +def build_weight_shared_convolutional_box_predictor( + is_training, + num_classes, + conv_hyperparams_fn, + depth, + num_layers_before_predictor, + box_code_size, + kernel_size=3, + add_background_class=True, + class_prediction_bias_init=0.0, + use_dropout=False, + dropout_keep_prob=0.8, + share_prediction_tower=False, + apply_batch_norm=True, + use_depthwise=False, + score_converter_fn=tf.identity, + box_encodings_clip_range=None, + keyword_args=None): + """Builds and returns a WeightSharedConvolutionalBoxPredictor class. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + depth: depth of conv layers. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + box_code_size: Size of encoding for each box. + kernel_size: Size of final convolution kernel. + add_background_class: Whether to add an implicit background class. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + share_prediction_tower: Whether to share the multi-layer tower between box + prediction and class prediction heads. + apply_batch_norm: Whether to apply batch normalization to conv layers in + this predictor. + use_depthwise: Whether to use depthwise separable conv2d instead of conv2d. + score_converter_fn: Callable score converter to perform elementwise op on + class scores. + box_encodings_clip_range: Min and max values for clipping the box_encodings. + keyword_args: A dictionary with additional args. + + Returns: + A WeightSharedConvolutionalBoxPredictor class. + """ + box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( + box_code_size=box_code_size, + kernel_size=kernel_size, + use_depthwise=use_depthwise, + box_encodings_clip_range=box_encodings_clip_range) + class_prediction_head = ( + class_head.WeightSharedConvolutionalClassHead( + num_class_slots=( + num_classes + 1 if add_background_class else num_classes), + kernel_size=kernel_size, + class_prediction_bias_init=class_prediction_bias_init, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + use_depthwise=use_depthwise, + score_converter_fn=score_converter_fn)) + other_heads = {} + return convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor( + is_training=is_training, + num_classes=num_classes, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + other_heads=other_heads, + conv_hyperparams_fn=conv_hyperparams_fn, + depth=depth, + num_layers_before_predictor=num_layers_before_predictor, + kernel_size=kernel_size, + apply_batch_norm=apply_batch_norm, + share_prediction_tower=share_prediction_tower, + use_depthwise=use_depthwise) + + +def build_weight_shared_convolutional_keras_box_predictor( + is_training, + num_classes, + conv_hyperparams, + freeze_batchnorm, + inplace_batchnorm_update, + num_predictions_per_location_list, + depth, + num_layers_before_predictor, + box_code_size, + kernel_size=3, + add_background_class=True, + class_prediction_bias_init=0.0, + use_dropout=False, + dropout_keep_prob=0.8, + share_prediction_tower=False, + apply_batch_norm=True, + use_depthwise=False, + score_converter_fn=tf.identity, + box_encodings_clip_range=None, + name='WeightSharedConvolutionalBoxPredictor', + keyword_args=None): + """Builds the Keras WeightSharedConvolutionalBoxPredictor from the arguments. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + num_predictions_per_location_list: A list of integers representing the + number of box predictions to be made per spatial location for each + feature map. + depth: depth of conv layers. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + box_code_size: Size of encoding for each box. + kernel_size: Size of final convolution kernel. + add_background_class: Whether to add an implicit background class. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + share_prediction_tower: Whether to share the multi-layer tower between box + prediction and class prediction heads. + apply_batch_norm: Whether to apply batch normalization to conv layers in + this predictor. + use_depthwise: Whether to use depthwise separable conv2d instead of conv2d. + score_converter_fn: Callable score converter to perform elementwise op on + class scores. + box_encodings_clip_range: Min and max values for clipping the box_encodings. + name: A string name scope to assign to the box predictor. If `None`, Keras + will auto-generate one from the class name. + keyword_args: A dictionary with additional args. + + Returns: + A Keras WeightSharedConvolutionalBoxPredictor class. + """ + if len(set(num_predictions_per_location_list)) > 1: + raise ValueError('num predictions per location must be same for all' + 'feature maps, found: {}'.format( + num_predictions_per_location_list)) + num_predictions_per_location = num_predictions_per_location_list[0] + + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=box_code_size, + kernel_size=kernel_size, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=num_predictions_per_location, + use_depthwise=use_depthwise, + box_encodings_clip_range=box_encodings_clip_range, + name='WeightSharedConvolutionalBoxHead') + class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=( + num_classes + 1 if add_background_class else num_classes), + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + kernel_size=kernel_size, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=num_predictions_per_location, + class_prediction_bias_init=class_prediction_bias_init, + use_depthwise=use_depthwise, + score_converter_fn=score_converter_fn, + name='WeightSharedConvolutionalClassHead') + other_heads = {} + + return ( + convolutional_keras_box_predictor.WeightSharedConvolutionalBoxPredictor( + is_training=is_training, + num_classes=num_classes, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + other_heads=other_heads, + conv_hyperparams=conv_hyperparams, + depth=depth, + num_layers_before_predictor=num_layers_before_predictor, + freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + kernel_size=kernel_size, + apply_batch_norm=apply_batch_norm, + share_prediction_tower=share_prediction_tower, + use_depthwise=use_depthwise, + name=name)) + + + + +def build_mask_rcnn_keras_box_predictor(is_training, + num_classes, + fc_hyperparams, + freeze_batchnorm, + use_dropout, + dropout_keep_prob, + box_code_size, + add_background_class=True, + share_box_across_classes=False, + predict_instance_masks=False, + conv_hyperparams=None, + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False, + convolve_then_upsample_masks=False): + """Builds and returns a MaskRCNNKerasBoxPredictor class. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + fc_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for fully connected dense ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + box_code_size: Size of encoding for each box. + add_background_class: Whether to add an implicit background class. + share_box_across_classes: Whether to share boxes across classes rather + than use a different box for each class. + predict_instance_masks: If True, will add a third stage mask prediction + to the returned class. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + mask_height: Desired output mask height. The default value is 14. + mask_width: Desired output mask width. The default value is 14. + mask_prediction_num_conv_layers: Number of convolution layers applied to + the image_features in mask prediction branch. + mask_prediction_conv_depth: The depth for the first conv2d_transpose op + applied to the image_features in the mask prediction branch. If set + to 0, the depth of the convolution layers will be automatically chosen + based on the number of object classes and the number of channels in the + image features. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + convolve_then_upsample_masks: Whether to apply convolutions on mask + features before upsampling using nearest neighbor resizing. Otherwise, + mask features are resized to [`mask_height`, `mask_width`] using + bilinear resizing before applying convolutions. + + Returns: + A MaskRCNNKerasBoxPredictor class. + """ + box_prediction_head = keras_box_head.MaskRCNNBoxHead( + is_training=is_training, + num_classes=num_classes, + fc_hyperparams=fc_hyperparams, + freeze_batchnorm=freeze_batchnorm, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + box_code_size=box_code_size, + share_box_across_classes=share_box_across_classes) + class_prediction_head = keras_class_head.MaskRCNNClassHead( + is_training=is_training, + num_class_slots=num_classes + 1 if add_background_class else num_classes, + fc_hyperparams=fc_hyperparams, + freeze_batchnorm=freeze_batchnorm, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob) + third_stage_heads = {} + if predict_instance_masks: + third_stage_heads[ + mask_rcnn_box_predictor. + MASK_PREDICTIONS] = keras_mask_head.MaskRCNNMaskHead( + is_training=is_training, + num_classes=num_classes, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + mask_height=mask_height, + mask_width=mask_width, + mask_prediction_num_conv_layers=mask_prediction_num_conv_layers, + mask_prediction_conv_depth=mask_prediction_conv_depth, + masks_are_class_agnostic=masks_are_class_agnostic, + convolve_then_upsample=convolve_then_upsample_masks) + return mask_rcnn_keras_box_predictor.MaskRCNNKerasBoxPredictor( + is_training=is_training, + num_classes=num_classes, + freeze_batchnorm=freeze_batchnorm, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + third_stage_heads=third_stage_heads) + + +def build_mask_rcnn_box_predictor(is_training, + num_classes, + fc_hyperparams_fn, + use_dropout, + dropout_keep_prob, + box_code_size, + add_background_class=True, + share_box_across_classes=False, + predict_instance_masks=False, + conv_hyperparams_fn=None, + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False, + convolve_then_upsample_masks=False): + """Builds and returns a MaskRCNNBoxPredictor class. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + fc_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for fully connected ops. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + box_code_size: Size of encoding for each box. + add_background_class: Whether to add an implicit background class. + share_box_across_classes: Whether to share boxes across classes rather + than use a different box for each class. + predict_instance_masks: If True, will add a third stage mask prediction + to the returned class. + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + mask_height: Desired output mask height. The default value is 14. + mask_width: Desired output mask width. The default value is 14. + mask_prediction_num_conv_layers: Number of convolution layers applied to + the image_features in mask prediction branch. + mask_prediction_conv_depth: The depth for the first conv2d_transpose op + applied to the image_features in the mask prediction branch. If set + to 0, the depth of the convolution layers will be automatically chosen + based on the number of object classes and the number of channels in the + image features. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + convolve_then_upsample_masks: Whether to apply convolutions on mask + features before upsampling using nearest neighbor resizing. Otherwise, + mask features are resized to [`mask_height`, `mask_width`] using + bilinear resizing before applying convolutions. + + Returns: + A MaskRCNNBoxPredictor class. + """ + box_prediction_head = box_head.MaskRCNNBoxHead( + is_training=is_training, + num_classes=num_classes, + fc_hyperparams_fn=fc_hyperparams_fn, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob, + box_code_size=box_code_size, + share_box_across_classes=share_box_across_classes) + class_prediction_head = class_head.MaskRCNNClassHead( + is_training=is_training, + num_class_slots=num_classes + 1 if add_background_class else num_classes, + fc_hyperparams_fn=fc_hyperparams_fn, + use_dropout=use_dropout, + dropout_keep_prob=dropout_keep_prob) + third_stage_heads = {} + if predict_instance_masks: + third_stage_heads[ + mask_rcnn_box_predictor. + MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead( + num_classes=num_classes, + conv_hyperparams_fn=conv_hyperparams_fn, + mask_height=mask_height, + mask_width=mask_width, + mask_prediction_num_conv_layers=mask_prediction_num_conv_layers, + mask_prediction_conv_depth=mask_prediction_conv_depth, + masks_are_class_agnostic=masks_are_class_agnostic, + convolve_then_upsample=convolve_then_upsample_masks) + return mask_rcnn_box_predictor.MaskRCNNBoxPredictor( + is_training=is_training, + num_classes=num_classes, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + third_stage_heads=third_stage_heads) + + +def build_score_converter(score_converter_config, is_training): + """Builds score converter based on the config. + + Builds one of [tf.identity, tf.sigmoid] score converters based on the config + and whether the BoxPredictor is for training or inference. + + Args: + score_converter_config: + box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.score_converter. + is_training: Indicates whether the BoxPredictor is in training mode. + + Returns: + Callable score converter op. + + Raises: + ValueError: On unknown score converter. + """ + if score_converter_config == ( + box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.IDENTITY): + return tf.identity + if score_converter_config == ( + box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.SIGMOID): + return tf.identity if is_training else tf.sigmoid + raise ValueError('Unknown score converter.') + + +BoxEncodingsClipRange = collections.namedtuple('BoxEncodingsClipRange', + ['min', 'max']) + + +def build(argscope_fn, box_predictor_config, is_training, num_classes, + add_background_class=True): + """Builds box predictor based on the configuration. + + Builds box predictor based on the configuration. See box_predictor.proto for + configurable options. Also, see box_predictor.py for more details. + + Args: + argscope_fn: A function that takes the following inputs: + * hyperparams_pb2.Hyperparams proto + * a boolean indicating if the model is in training mode. + and returns a tf slim argscope for Conv and FC hyperparameters. + box_predictor_config: box_predictor_pb2.BoxPredictor proto containing + configuration. + is_training: Whether the models is in training mode. + num_classes: Number of classes to predict. + add_background_class: Whether to add an implicit background class. + + Returns: + box_predictor: box_predictor.BoxPredictor object. + + Raises: + ValueError: On unknown box predictor. + """ + if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor): + raise ValueError('box_predictor_config not of type ' + 'box_predictor_pb2.BoxPredictor.') + + box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof') + + if box_predictor_oneof == 'convolutional_box_predictor': + config_box_predictor = box_predictor_config.convolutional_box_predictor + conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams, + is_training) + # Optionally apply clipping to box encodings, when box_encodings_clip_range + # is set. + box_encodings_clip_range = None + if config_box_predictor.HasField('box_encodings_clip_range'): + box_encodings_clip_range = BoxEncodingsClipRange( + min=config_box_predictor.box_encodings_clip_range.min, + max=config_box_predictor.box_encodings_clip_range.max) + return build_convolutional_box_predictor( + is_training=is_training, + num_classes=num_classes, + add_background_class=add_background_class, + conv_hyperparams_fn=conv_hyperparams_fn, + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + box_code_size=config_box_predictor.box_code_size, + kernel_size=config_box_predictor.kernel_size, + num_layers_before_predictor=( + config_box_predictor.num_layers_before_predictor), + min_depth=config_box_predictor.min_depth, + max_depth=config_box_predictor.max_depth, + apply_sigmoid_to_scores=config_box_predictor.apply_sigmoid_to_scores, + class_prediction_bias_init=( + config_box_predictor.class_prediction_bias_init), + use_depthwise=config_box_predictor.use_depthwise, + box_encodings_clip_range=box_encodings_clip_range) + + if box_predictor_oneof == 'weight_shared_convolutional_box_predictor': + config_box_predictor = ( + box_predictor_config.weight_shared_convolutional_box_predictor) + conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams, + is_training) + apply_batch_norm = config_box_predictor.conv_hyperparams.HasField( + 'batch_norm') + # During training phase, logits are used to compute the loss. Only apply + # sigmoid at inference to make the inference graph TPU friendly. + score_converter_fn = build_score_converter( + config_box_predictor.score_converter, is_training) + # Optionally apply clipping to box encodings, when box_encodings_clip_range + # is set. + box_encodings_clip_range = None + if config_box_predictor.HasField('box_encodings_clip_range'): + box_encodings_clip_range = BoxEncodingsClipRange( + min=config_box_predictor.box_encodings_clip_range.min, + max=config_box_predictor.box_encodings_clip_range.max) + keyword_args = None + + return build_weight_shared_convolutional_box_predictor( + is_training=is_training, + num_classes=num_classes, + add_background_class=add_background_class, + conv_hyperparams_fn=conv_hyperparams_fn, + depth=config_box_predictor.depth, + num_layers_before_predictor=( + config_box_predictor.num_layers_before_predictor), + box_code_size=config_box_predictor.box_code_size, + kernel_size=config_box_predictor.kernel_size, + class_prediction_bias_init=( + config_box_predictor.class_prediction_bias_init), + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + share_prediction_tower=config_box_predictor.share_prediction_tower, + apply_batch_norm=apply_batch_norm, + use_depthwise=config_box_predictor.use_depthwise, + score_converter_fn=score_converter_fn, + box_encodings_clip_range=box_encodings_clip_range, + keyword_args=keyword_args) + + + if box_predictor_oneof == 'mask_rcnn_box_predictor': + config_box_predictor = box_predictor_config.mask_rcnn_box_predictor + fc_hyperparams_fn = argscope_fn(config_box_predictor.fc_hyperparams, + is_training) + conv_hyperparams_fn = None + if config_box_predictor.HasField('conv_hyperparams'): + conv_hyperparams_fn = argscope_fn( + config_box_predictor.conv_hyperparams, is_training) + return build_mask_rcnn_box_predictor( + is_training=is_training, + num_classes=num_classes, + add_background_class=add_background_class, + fc_hyperparams_fn=fc_hyperparams_fn, + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + box_code_size=config_box_predictor.box_code_size, + share_box_across_classes=( + config_box_predictor.share_box_across_classes), + predict_instance_masks=config_box_predictor.predict_instance_masks, + conv_hyperparams_fn=conv_hyperparams_fn, + mask_height=config_box_predictor.mask_height, + mask_width=config_box_predictor.mask_width, + mask_prediction_num_conv_layers=( + config_box_predictor.mask_prediction_num_conv_layers), + mask_prediction_conv_depth=( + config_box_predictor.mask_prediction_conv_depth), + masks_are_class_agnostic=( + config_box_predictor.masks_are_class_agnostic), + convolve_then_upsample_masks=( + config_box_predictor.convolve_then_upsample_masks)) + + if box_predictor_oneof == 'rfcn_box_predictor': + config_box_predictor = box_predictor_config.rfcn_box_predictor + conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams, + is_training) + box_predictor_object = rfcn_box_predictor.RfcnBoxPredictor( + is_training=is_training, + num_classes=num_classes, + conv_hyperparams_fn=conv_hyperparams_fn, + crop_size=[config_box_predictor.crop_height, + config_box_predictor.crop_width], + num_spatial_bins=[config_box_predictor.num_spatial_bins_height, + config_box_predictor.num_spatial_bins_width], + depth=config_box_predictor.depth, + box_code_size=config_box_predictor.box_code_size) + return box_predictor_object + raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof)) + + +def build_keras(hyperparams_fn, freeze_batchnorm, inplace_batchnorm_update, + num_predictions_per_location_list, box_predictor_config, + is_training, num_classes, add_background_class=True): + """Builds a Keras-based box predictor based on the configuration. + + Builds Keras-based box predictor based on the configuration. + See box_predictor.proto for configurable options. Also, see box_predictor.py + for more details. + + Args: + hyperparams_fn: A function that takes a hyperparams_pb2.Hyperparams + proto and returns a `hyperparams_builder.KerasLayerHyperparams` + for Conv or FC hyperparameters. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + num_predictions_per_location_list: A list of integers representing the + number of box predictions to be made per spatial location for each + feature map. + box_predictor_config: box_predictor_pb2.BoxPredictor proto containing + configuration. + is_training: Whether the models is in training mode. + num_classes: Number of classes to predict. + add_background_class: Whether to add an implicit background class. + + Returns: + box_predictor: box_predictor.KerasBoxPredictor object. + + Raises: + ValueError: On unknown box predictor, or one with no Keras box predictor. + """ + if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor): + raise ValueError('box_predictor_config not of type ' + 'box_predictor_pb2.BoxPredictor.') + + box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof') + + if box_predictor_oneof == 'convolutional_box_predictor': + config_box_predictor = box_predictor_config.convolutional_box_predictor + conv_hyperparams = hyperparams_fn( + config_box_predictor.conv_hyperparams) + # Optionally apply clipping to box encodings, when box_encodings_clip_range + # is set. + box_encodings_clip_range = None + if config_box_predictor.HasField('box_encodings_clip_range'): + box_encodings_clip_range = BoxEncodingsClipRange( + min=config_box_predictor.box_encodings_clip_range.min, + max=config_box_predictor.box_encodings_clip_range.max) + + return build_convolutional_keras_box_predictor( + is_training=is_training, + num_classes=num_classes, + add_background_class=add_background_class, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + num_predictions_per_location_list=num_predictions_per_location_list, + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + box_code_size=config_box_predictor.box_code_size, + kernel_size=config_box_predictor.kernel_size, + num_layers_before_predictor=( + config_box_predictor.num_layers_before_predictor), + min_depth=config_box_predictor.min_depth, + max_depth=config_box_predictor.max_depth, + class_prediction_bias_init=( + config_box_predictor.class_prediction_bias_init), + use_depthwise=config_box_predictor.use_depthwise, + box_encodings_clip_range=box_encodings_clip_range) + + if box_predictor_oneof == 'weight_shared_convolutional_box_predictor': + config_box_predictor = ( + box_predictor_config.weight_shared_convolutional_box_predictor) + conv_hyperparams = hyperparams_fn(config_box_predictor.conv_hyperparams) + apply_batch_norm = config_box_predictor.conv_hyperparams.HasField( + 'batch_norm') + # During training phase, logits are used to compute the loss. Only apply + # sigmoid at inference to make the inference graph TPU friendly. This is + # required because during TPU inference, model.postprocess is not called. + score_converter_fn = build_score_converter( + config_box_predictor.score_converter, is_training) + # Optionally apply clipping to box encodings, when box_encodings_clip_range + # is set. + box_encodings_clip_range = None + if config_box_predictor.HasField('box_encodings_clip_range'): + box_encodings_clip_range = BoxEncodingsClipRange( + min=config_box_predictor.box_encodings_clip_range.min, + max=config_box_predictor.box_encodings_clip_range.max) + keyword_args = None + + return build_weight_shared_convolutional_keras_box_predictor( + is_training=is_training, + num_classes=num_classes, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + num_predictions_per_location_list=num_predictions_per_location_list, + depth=config_box_predictor.depth, + num_layers_before_predictor=( + config_box_predictor.num_layers_before_predictor), + box_code_size=config_box_predictor.box_code_size, + kernel_size=config_box_predictor.kernel_size, + add_background_class=add_background_class, + class_prediction_bias_init=( + config_box_predictor.class_prediction_bias_init), + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + share_prediction_tower=config_box_predictor.share_prediction_tower, + apply_batch_norm=apply_batch_norm, + use_depthwise=config_box_predictor.use_depthwise, + score_converter_fn=score_converter_fn, + box_encodings_clip_range=box_encodings_clip_range, + keyword_args=keyword_args) + + if box_predictor_oneof == 'mask_rcnn_box_predictor': + config_box_predictor = box_predictor_config.mask_rcnn_box_predictor + fc_hyperparams = hyperparams_fn(config_box_predictor.fc_hyperparams) + conv_hyperparams = None + if config_box_predictor.HasField('conv_hyperparams'): + conv_hyperparams = hyperparams_fn( + config_box_predictor.conv_hyperparams) + return build_mask_rcnn_keras_box_predictor( + is_training=is_training, + num_classes=num_classes, + add_background_class=add_background_class, + fc_hyperparams=fc_hyperparams, + freeze_batchnorm=freeze_batchnorm, + use_dropout=config_box_predictor.use_dropout, + dropout_keep_prob=config_box_predictor.dropout_keep_probability, + box_code_size=config_box_predictor.box_code_size, + share_box_across_classes=( + config_box_predictor.share_box_across_classes), + predict_instance_masks=config_box_predictor.predict_instance_masks, + conv_hyperparams=conv_hyperparams, + mask_height=config_box_predictor.mask_height, + mask_width=config_box_predictor.mask_width, + mask_prediction_num_conv_layers=( + config_box_predictor.mask_prediction_num_conv_layers), + mask_prediction_conv_depth=( + config_box_predictor.mask_prediction_conv_depth), + masks_are_class_agnostic=( + config_box_predictor.masks_are_class_agnostic), + convolve_then_upsample_masks=( + config_box_predictor.convolve_then_upsample_masks)) + + if box_predictor_oneof == 'rfcn_box_predictor': + config_box_predictor = box_predictor_config.rfcn_box_predictor + conv_hyperparams = hyperparams_fn(config_box_predictor.conv_hyperparams) + box_predictor_object = rfcn_keras_box_predictor.RfcnKerasBoxPredictor( + is_training=is_training, + num_classes=num_classes, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + crop_size=[config_box_predictor.crop_height, + config_box_predictor.crop_width], + num_spatial_bins=[config_box_predictor.num_spatial_bins_height, + config_box_predictor.num_spatial_bins_width], + depth=config_box_predictor.depth, + box_code_size=config_box_predictor.box_code_size) + return box_predictor_object + + raise ValueError( + 'Unknown box predictor for Keras: {}'.format(box_predictor_oneof)) diff --git a/builders/box_predictor_builder_test.py b/builders/box_predictor_builder_test.py new file mode 100644 index 0000000..2494bc3 --- /dev/null +++ b/builders/box_predictor_builder_test.py @@ -0,0 +1,661 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for box_predictor_builder.""" + +import mock +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.predictors import mask_rcnn_box_predictor +from object_detection.protos import box_predictor_pb2 +from object_detection.protos import hyperparams_pb2 + + +class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): + + def test_box_predictor_calls_conv_argscope_fn(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.0003 + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.3 + } + } + activation: RELU_6 + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10) + (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn + self.assertAlmostEqual((hyperparams_proto.regularizer. + l1_regularizer.weight), + (conv_hyperparams_actual.regularizer.l1_regularizer. + weight)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.stddev), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.stddev)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.mean), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.mean)) + self.assertEqual(hyperparams_proto.activation, + conv_hyperparams_actual.activation) + self.assertFalse(is_training) + + def test_construct_non_default_conv_box_predictor(self): + box_predictor_text_proto = """ + convolutional_box_predictor { + min_depth: 2 + max_depth: 16 + num_layers_before_predictor: 2 + use_dropout: false + dropout_keep_probability: 0.4 + kernel_size: 3 + box_code_size: 3 + apply_sigmoid_to_scores: true + class_prediction_bias_init: 4.0 + use_depthwise: true + } + """ + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10, + add_background_class=False) + class_head = box_predictor._class_prediction_head + self.assertEqual(box_predictor._min_depth, 2) + self.assertEqual(box_predictor._max_depth, 16) + self.assertEqual(box_predictor._num_layers_before_predictor, 2) + self.assertFalse(class_head._use_dropout) + self.assertAlmostEqual(class_head._dropout_keep_prob, 0.4) + self.assertTrue(class_head._apply_sigmoid_to_scores) + self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0) + self.assertEqual(class_head._num_class_slots, 10) + self.assertEqual(box_predictor.num_classes, 10) + self.assertFalse(box_predictor._is_training) + self.assertTrue(class_head._use_depthwise) + + def test_construct_default_conv_box_predictor(self): + box_predictor_text_proto = """ + convolutional_box_predictor { + conv_hyperparams { + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + }""" + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=hyperparams_builder.build, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + class_head = box_predictor._class_prediction_head + self.assertEqual(box_predictor._min_depth, 0) + self.assertEqual(box_predictor._max_depth, 0) + self.assertEqual(box_predictor._num_layers_before_predictor, 0) + self.assertTrue(class_head._use_dropout) + self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8) + self.assertFalse(class_head._apply_sigmoid_to_scores) + self.assertEqual(class_head._num_class_slots, 91) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertFalse(class_head._use_depthwise) + + +class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): + + def test_box_predictor_calls_conv_argscope_fn(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.0003 + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.3 + } + } + activation: RELU_6 + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + (box_predictor_proto.weight_shared_convolutional_box_predictor + .conv_hyperparams.CopyFrom(hyperparams_proto)) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10) + (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn + self.assertAlmostEqual((hyperparams_proto.regularizer. + l1_regularizer.weight), + (conv_hyperparams_actual.regularizer.l1_regularizer. + weight)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.stddev), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.stddev)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.mean), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.mean)) + self.assertEqual(hyperparams_proto.activation, + conv_hyperparams_actual.activation) + self.assertFalse(is_training) + + def test_construct_non_default_conv_box_predictor(self): + box_predictor_text_proto = """ + weight_shared_convolutional_box_predictor { + depth: 2 + num_layers_before_predictor: 2 + kernel_size: 7 + box_code_size: 3 + class_prediction_bias_init: 4.0 + } + """ + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + (box_predictor_proto.weight_shared_convolutional_box_predictor. + conv_hyperparams.CopyFrom(hyperparams_proto)) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10, + add_background_class=False) + class_head = box_predictor._class_prediction_head + self.assertEqual(box_predictor._depth, 2) + self.assertEqual(box_predictor._num_layers_before_predictor, 2) + self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0) + self.assertEqual(box_predictor.num_classes, 10) + self.assertFalse(box_predictor._is_training) + self.assertEqual(box_predictor._apply_batch_norm, False) + + def test_construct_non_default_depthwise_conv_box_predictor(self): + box_predictor_text_proto = """ + weight_shared_convolutional_box_predictor { + depth: 2 + num_layers_before_predictor: 2 + kernel_size: 7 + box_code_size: 3 + class_prediction_bias_init: 4.0 + use_depthwise: true + } + """ + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + (box_predictor_proto.weight_shared_convolutional_box_predictor. + conv_hyperparams.CopyFrom(hyperparams_proto)) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10, + add_background_class=False) + class_head = box_predictor._class_prediction_head + self.assertEqual(box_predictor._depth, 2) + self.assertEqual(box_predictor._num_layers_before_predictor, 2) + self.assertEqual(box_predictor._apply_batch_norm, False) + self.assertEqual(box_predictor._use_depthwise, True) + self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0) + self.assertEqual(box_predictor.num_classes, 10) + self.assertFalse(box_predictor._is_training) + + def test_construct_default_conv_box_predictor(self): + box_predictor_text_proto = """ + weight_shared_convolutional_box_predictor { + conv_hyperparams { + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + }""" + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=hyperparams_builder.build, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + self.assertEqual(box_predictor._depth, 0) + self.assertEqual(box_predictor._num_layers_before_predictor, 0) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_predictor._apply_batch_norm, False) + + def test_construct_default_conv_box_predictor_with_batch_norm(self): + box_predictor_text_proto = """ + weight_shared_convolutional_box_predictor { + conv_hyperparams { + regularizer { + l1_regularizer { + } + } + batch_norm { + train: true + } + initializer { + truncated_normal_initializer { + } + } + } + }""" + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=hyperparams_builder.build, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + self.assertEqual(box_predictor._depth, 0) + self.assertEqual(box_predictor._num_layers_before_predictor, 0) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_predictor._apply_batch_norm, True) + + + + + +class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): + + def test_box_predictor_builder_calls_fc_argscope_fn(self): + fc_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.0003 + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.3 + } + } + activation: RELU_6 + op: FC + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( + hyperparams_proto) + mock_argscope_fn = mock.Mock(return_value='arg_scope') + box_predictor = box_predictor_builder.build( + argscope_fn=mock_argscope_fn, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10) + mock_argscope_fn.assert_called_with(hyperparams_proto, False) + self.assertEqual(box_predictor._box_prediction_head._fc_hyperparams_fn, + 'arg_scope') + self.assertEqual(box_predictor._class_prediction_head._fc_hyperparams_fn, + 'arg_scope') + + def test_non_default_mask_rcnn_box_predictor(self): + fc_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + op: FC + """ + box_predictor_text_proto = """ + mask_rcnn_box_predictor { + use_dropout: true + dropout_keep_probability: 0.8 + box_code_size: 3 + share_box_across_classes: true + } + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) + def mock_fc_argscope_builder(fc_hyperparams_arg, is_training): + return (fc_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_fc_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + box_head = box_predictor._box_prediction_head + class_head = box_predictor._class_prediction_head + self.assertTrue(box_head._use_dropout) + self.assertTrue(class_head._use_dropout) + self.assertAlmostEqual(box_head._dropout_keep_prob, 0.8) + self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_head._box_code_size, 3) + self.assertEqual(box_head._share_box_across_classes, True) + + def test_build_default_mask_rcnn_box_predictor(self): + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( + hyperparams_pb2.Hyperparams.FC) + box_predictor = box_predictor_builder.build( + argscope_fn=mock.Mock(return_value='arg_scope'), + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + box_head = box_predictor._box_prediction_head + class_head = box_predictor._class_prediction_head + self.assertFalse(box_head._use_dropout) + self.assertFalse(class_head._use_dropout) + self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_head._box_code_size, 4) + self.assertEqual(len(box_predictor._third_stage_heads.keys()), 0) + + def test_build_box_predictor_with_mask_branch(self): + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( + hyperparams_pb2.Hyperparams.FC) + box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( + hyperparams_pb2.Hyperparams.CONV) + box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True + box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 + box_predictor_proto.mask_rcnn_box_predictor.mask_height = 16 + box_predictor_proto.mask_rcnn_box_predictor.mask_width = 16 + mock_argscope_fn = mock.Mock(return_value='arg_scope') + box_predictor = box_predictor_builder.build( + argscope_fn=mock_argscope_fn, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + mock_argscope_fn.assert_has_calls( + [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, + True), + mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, + True)], any_order=True) + box_head = box_predictor._box_prediction_head + class_head = box_predictor._class_prediction_head + third_stage_heads = box_predictor._third_stage_heads + self.assertFalse(box_head._use_dropout) + self.assertFalse(class_head._use_dropout) + self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5) + self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_head._box_code_size, 4) + self.assertTrue( + mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads) + self.assertEqual( + third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS] + ._mask_prediction_conv_depth, 512) + + def test_build_box_predictor_with_convlve_then_upsample_masks(self): + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( + hyperparams_pb2.Hyperparams.FC) + box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( + hyperparams_pb2.Hyperparams.CONV) + box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True + box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 + box_predictor_proto.mask_rcnn_box_predictor.mask_height = 24 + box_predictor_proto.mask_rcnn_box_predictor.mask_width = 24 + box_predictor_proto.mask_rcnn_box_predictor.convolve_then_upsample_masks = ( + True) + + mock_argscope_fn = mock.Mock(return_value='arg_scope') + box_predictor = box_predictor_builder.build( + argscope_fn=mock_argscope_fn, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + mock_argscope_fn.assert_has_calls( + [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, + True), + mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, + True)], any_order=True) + box_head = box_predictor._box_prediction_head + class_head = box_predictor._class_prediction_head + third_stage_heads = box_predictor._third_stage_heads + self.assertFalse(box_head._use_dropout) + self.assertFalse(class_head._use_dropout) + self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5) + self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_head._box_code_size, 4) + self.assertTrue( + mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads) + self.assertEqual( + third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS] + ._mask_prediction_conv_depth, 512) + self.assertTrue(third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS] + ._convolve_then_upsample) + + +class RfcnBoxPredictorBuilderTest(tf.test.TestCase): + + def test_box_predictor_calls_fc_argscope_fn(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.0003 + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.3 + } + } + activation: RELU_6 + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=False, + num_classes=10) + (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn + self.assertAlmostEqual((hyperparams_proto.regularizer. + l1_regularizer.weight), + (conv_hyperparams_actual.regularizer.l1_regularizer. + weight)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.stddev), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.stddev)) + self.assertAlmostEqual((hyperparams_proto.initializer. + truncated_normal_initializer.mean), + (conv_hyperparams_actual.initializer. + truncated_normal_initializer.mean)) + self.assertEqual(hyperparams_proto.activation, + conv_hyperparams_actual.activation) + self.assertFalse(is_training) + + def test_non_default_rfcn_box_predictor(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + """ + box_predictor_text_proto = """ + rfcn_box_predictor { + num_spatial_bins_height: 4 + num_spatial_bins_width: 4 + depth: 4 + box_code_size: 3 + crop_height: 16 + crop_width: 16 + } + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge(box_predictor_text_proto, box_predictor_proto) + box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_predictor._box_code_size, 3) + self.assertEqual(box_predictor._num_spatial_bins, [4, 4]) + self.assertEqual(box_predictor._crop_size, [16, 16]) + + def test_default_rfcn_box_predictor(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + """ + hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) + def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): + return (conv_hyperparams_arg, is_training) + + box_predictor_proto = box_predictor_pb2.BoxPredictor() + box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( + hyperparams_proto) + box_predictor = box_predictor_builder.build( + argscope_fn=mock_conv_argscope_builder, + box_predictor_config=box_predictor_proto, + is_training=True, + num_classes=90) + self.assertEqual(box_predictor.num_classes, 90) + self.assertTrue(box_predictor._is_training) + self.assertEqual(box_predictor._box_code_size, 4) + self.assertEqual(box_predictor._num_spatial_bins, [3, 3]) + self.assertEqual(box_predictor._crop_size, [12, 12]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/calibration_builder.py b/builders/calibration_builder.py new file mode 100644 index 0000000..a99d38b --- /dev/null +++ b/builders/calibration_builder.py @@ -0,0 +1,250 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tensorflow ops to calibrate class predictions and background class.""" + +import tensorflow as tf +from object_detection.utils import shape_utils + + +def _find_interval_containing_new_value(x, new_value): + """Find the index of x (ascending-ordered) after which new_value occurs.""" + new_value_shape = shape_utils.combined_static_and_dynamic_shape(new_value)[0] + x_shape = shape_utils.combined_static_and_dynamic_shape(x)[0] + compare = tf.cast(tf.reshape(new_value, shape=(new_value_shape, 1)) >= + tf.reshape(x, shape=(1, x_shape)), + dtype=tf.int32) + diff = compare[:, 1:] - compare[:, :-1] + interval_idx = tf.argmin(diff, axis=1) + return interval_idx + + +def _tf_linear_interp1d(x_to_interpolate, fn_x, fn_y): + """Tensorflow implementation of 1d linear interpolation. + + Args: + x_to_interpolate: tf.float32 Tensor of shape (num_examples,) over which 1d + linear interpolation is performed. + fn_x: Monotonically-increasing, non-repeating tf.float32 Tensor of shape + (length,) used as the domain to approximate a function. + fn_y: tf.float32 Tensor of shape (length,) used as the range to approximate + a function. + + Returns: + tf.float32 Tensor of shape (num_examples,) + """ + x_pad = tf.concat([fn_x[:1] - 1, fn_x, fn_x[-1:] + 1], axis=0) + y_pad = tf.concat([fn_y[:1], fn_y, fn_y[-1:]], axis=0) + interval_idx = _find_interval_containing_new_value(x_pad, x_to_interpolate) + + # Interpolate + alpha = ( + (x_to_interpolate - tf.gather(x_pad, interval_idx)) / + (tf.gather(x_pad, interval_idx + 1) - tf.gather(x_pad, interval_idx))) + interpolation = ((1 - alpha) * tf.gather(y_pad, interval_idx) + + alpha * tf.gather(y_pad, interval_idx + 1)) + + return interpolation + + +def _function_approximation_proto_to_tf_tensors(x_y_pairs_message): + """Extracts (x,y) pairs from a XYPairs message. + + Args: + x_y_pairs_message: calibration_pb2..XYPairs proto + Returns: + tf_x: tf.float32 tensor of shape (number_xy_pairs,) for function domain. + tf_y: tf.float32 tensor of shape (number_xy_pairs,) for function range. + """ + tf_x = tf.convert_to_tensor([x_y_pair.x + for x_y_pair + in x_y_pairs_message.x_y_pair], + dtype=tf.float32) + tf_y = tf.convert_to_tensor([x_y_pair.y + for x_y_pair + in x_y_pairs_message.x_y_pair], + dtype=tf.float32) + return tf_x, tf_y + + +def _get_class_id_function_dict(calibration_config): + """Create a dictionary mapping class id to function approximations. + + Args: + calibration_config: calibration_pb2 proto containing + id_function_approximations. + Returns: + Dictionary mapping a class id to a tuple of TF tensors to be used for + function approximation. + """ + class_id_function_dict = {} + class_id_xy_pairs_map = ( + calibration_config.class_id_function_approximations.class_id_xy_pairs_map) + for class_id in class_id_xy_pairs_map: + class_id_function_dict[class_id] = ( + _function_approximation_proto_to_tf_tensors( + class_id_xy_pairs_map[class_id])) + + return class_id_function_dict + + +def build(calibration_config): + """Returns a function that calibrates Tensorflow model scores. + + All returned functions are expected to apply positive monotonic + transformations to inputs (i.e. score ordering is strictly preserved or + adjacent scores are mapped to the same score, but an input of lower value + should never be exceed an input of higher value after transformation). For + class-agnostic calibration, positive monotonicity should hold across all + scores. In class-specific cases, positive monotonicity should hold within each + class. + + Args: + calibration_config: calibration_pb2.CalibrationConfig proto. + Returns: + Function that that accepts class_predictions_with_background and calibrates + the output based on calibration_config's parameters. + Raises: + ValueError: No calibration builder defined for "Oneof" in + calibration_config. + """ + + # Linear Interpolation (usually used as a result of calibration via + # isotonic regression). + if calibration_config.WhichOneof('calibrator') == 'function_approximation': + + def calibration_fn(class_predictions_with_background): + """Calibrate predictions via 1-d linear interpolation. + + Predictions scores are linearly interpolated based on a class-agnostic + function approximation. Note that the 0-indexed background class is also + transformed. + + Args: + class_predictions_with_background: tf.float32 tensor of shape + [batch_size, num_anchors, num_classes + 1] containing scores on the + interval [0,1]. This is usually produced by a sigmoid or softmax layer + and the result of calling the `predict` method of a detection model. + + Returns: + tf.float32 tensor of the same shape as the input with values on the + interval [0, 1]. + """ + # Flattening Tensors and then reshaping at the end. + flat_class_predictions_with_background = tf.reshape( + class_predictions_with_background, shape=[-1]) + fn_x, fn_y = _function_approximation_proto_to_tf_tensors( + calibration_config.function_approximation.x_y_pairs) + updated_scores = _tf_linear_interp1d( + flat_class_predictions_with_background, fn_x, fn_y) + + # Un-flatten the scores + original_detections_shape = shape_utils.combined_static_and_dynamic_shape( + class_predictions_with_background) + calibrated_class_predictions_with_background = tf.reshape( + updated_scores, + shape=original_detections_shape, + name='calibrate_scores') + return calibrated_class_predictions_with_background + + elif (calibration_config.WhichOneof('calibrator') == + 'class_id_function_approximations'): + + def calibration_fn(class_predictions_with_background): + """Calibrate predictions per class via 1-d linear interpolation. + + Prediction scores are linearly interpolated with class-specific function + approximations. Note that after calibration, an anchor's class scores will + not necessarily sum to 1, and score ordering may change, depending on each + class' calibration parameters. + + Args: + class_predictions_with_background: tf.float32 tensor of shape + [batch_size, num_anchors, num_classes + 1] containing scores on the + interval [0,1]. This is usually produced by a sigmoid or softmax layer + and the result of calling the `predict` method of a detection model. + + Returns: + tf.float32 tensor of the same shape as the input with values on the + interval [0, 1]. + + Raises: + KeyError: Calibration parameters are not present for a class. + """ + class_id_function_dict = _get_class_id_function_dict(calibration_config) + + # Tensors are split by class and then recombined at the end to recover + # the input's original shape. If a class id does not have calibration + # parameters, it is left unchanged. + class_tensors = tf.unstack(class_predictions_with_background, axis=-1) + calibrated_class_tensors = [] + for class_id, class_tensor in enumerate(class_tensors): + flat_class_tensor = tf.reshape(class_tensor, shape=[-1]) + if class_id in class_id_function_dict: + output_tensor = _tf_linear_interp1d( + x_to_interpolate=flat_class_tensor, + fn_x=class_id_function_dict[class_id][0], + fn_y=class_id_function_dict[class_id][1]) + else: + tf.logging.info( + 'Calibration parameters for class id `%d` not not found', + class_id) + output_tensor = flat_class_tensor + calibrated_class_tensors.append(output_tensor) + + combined_calibrated_tensor = tf.stack(calibrated_class_tensors, axis=1) + input_shape = shape_utils.combined_static_and_dynamic_shape( + class_predictions_with_background) + calibrated_class_predictions_with_background = tf.reshape( + combined_calibrated_tensor, + shape=input_shape, + name='calibrate_scores') + return calibrated_class_predictions_with_background + + elif (calibration_config.WhichOneof('calibrator') == + 'temperature_scaling_calibration'): + + def calibration_fn(class_predictions_with_background): + """Calibrate predictions via temperature scaling. + + Predictions logits scores are scaled by the temperature scaler. Note that + the 0-indexed background class is also transformed. + + Args: + class_predictions_with_background: tf.float32 tensor of shape + [batch_size, num_anchors, num_classes + 1] containing logits scores. + This is usually produced before a sigmoid or softmax layer. + + Returns: + tf.float32 tensor of the same shape as the input. + + Raises: + ValueError: If temperature scaler is of incorrect value. + """ + scaler = calibration_config.temperature_scaling_calibration.scaler + if scaler <= 0: + raise ValueError('The scaler in temperature scaling must be positive.') + calibrated_class_predictions_with_background = tf.math.divide( + class_predictions_with_background, + scaler, + name='calibrate_score') + return calibrated_class_predictions_with_background + + # TODO(zbeaver): Add sigmoid calibration. + else: + raise ValueError('No calibration builder defined for "Oneof" in ' + 'calibration_config.') + + return calibration_fn diff --git a/builders/calibration_builder_test.py b/builders/calibration_builder_test.py new file mode 100644 index 0000000..05971e6 --- /dev/null +++ b/builders/calibration_builder_test.py @@ -0,0 +1,224 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for calibration_builder.""" + +import numpy as np +from scipy import interpolate +import tensorflow as tf +from object_detection.builders import calibration_builder +from object_detection.protos import calibration_pb2 + + +class CalibrationBuilderTest(tf.test.TestCase): + + def test_tf_linear_interp1d_map(self): + """Tests TF linear interpolation mapping to a single number.""" + with self.test_session() as sess: + tf_x = tf.constant([0., 0.5, 1.]) + tf_y = tf.constant([0.5, 0.5, 0.5]) + new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.]) + tf_map_outputs = calibration_builder._tf_linear_interp1d( + new_x, tf_x, tf_y) + tf_map_outputs_np = sess.run([tf_map_outputs]) + self.assertAllClose(tf_map_outputs_np, [[0.5, 0.5, 0.5, 0.5, 0.5]]) + + def test_tf_linear_interp1d_interpolate(self): + """Tests TF 1d linear interpolation not mapping to a single number.""" + with self.test_session() as sess: + tf_x = tf.constant([0., 0.5, 1.]) + tf_y = tf.constant([0.6, 0.7, 1.0]) + new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.]) + tf_interpolate_outputs = calibration_builder._tf_linear_interp1d( + new_x, tf_x, tf_y) + tf_interpolate_outputs_np = sess.run([tf_interpolate_outputs]) + self.assertAllClose(tf_interpolate_outputs_np, [[0.6, 0.65, 0.7, 0.85, 1.]]) + + @staticmethod + def _get_scipy_interp1d(new_x, x, y): + """Helper performing 1d linear interpolation using SciPy.""" + interpolation1d_fn = interpolate.interp1d(x, y) + return interpolation1d_fn(new_x) + + def _get_tf_interp1d(self, new_x, x, y): + """Helper performing 1d linear interpolation using Tensorflow.""" + with self.test_session() as sess: + tf_interp_outputs = calibration_builder._tf_linear_interp1d( + tf.convert_to_tensor(new_x, dtype=tf.float32), + tf.convert_to_tensor(x, dtype=tf.float32), + tf.convert_to_tensor(y, dtype=tf.float32)) + np_tf_interp_outputs = sess.run(tf_interp_outputs) + return np_tf_interp_outputs + + def test_tf_linear_interp1d_against_scipy_map(self): + """Tests parity of TF linear interpolation with SciPy for simple mapping.""" + length = 10 + np_x = np.linspace(0, 1, length) + + # Mapping all numbers to 0.5 + np_y_map = np.repeat(0.5, length) + + # Scipy and TF interpolations + test_data_np = np.linspace(0, 1, length * 10) + scipy_map_outputs = self._get_scipy_interp1d(test_data_np, np_x, np_y_map) + np_tf_map_outputs = self._get_tf_interp1d(test_data_np, np_x, np_y_map) + self.assertAllClose(scipy_map_outputs, np_tf_map_outputs) + + def test_tf_linear_interp1d_against_scipy_interpolate(self): + """Tests parity of TF linear interpolation with SciPy.""" + length = 10 + np_x = np.linspace(0, 1, length) + + # Requires interpolation over 0.5 to 1 domain + np_y_interp = np.linspace(0.5, 1, length) + + # Scipy interpolation for comparison + test_data_np = np.linspace(0, 1, length * 10) + scipy_interp_outputs = self._get_scipy_interp1d(test_data_np, np_x, + np_y_interp) + np_tf_interp_outputs = self._get_tf_interp1d(test_data_np, np_x, + np_y_interp) + self.assertAllClose(scipy_interp_outputs, np_tf_interp_outputs) + + @staticmethod + def _add_function_approximation_to_calibration_proto(calibration_proto, + x_array, y_array, + class_id): + """Adds a function approximation to calibration proto for a class id.""" + # Per-class calibration. + if class_id is not None: + function_approximation = ( + calibration_proto.class_id_function_approximations + .class_id_xy_pairs_map[class_id]) + # Class-agnostic calibration. + else: + function_approximation = ( + calibration_proto.function_approximation.x_y_pairs) + + for x, y in zip(x_array, y_array): + x_y_pair_message = function_approximation.x_y_pair.add() + x_y_pair_message.x = x + x_y_pair_message.y = y + + def test_class_agnostic_function_approximation(self): + """Tests that calibration produces correct class-agnostic values.""" + # Generate fake calibration proto. For this interpolation, any input on + # [0.0, 0.5] should be divided by 2 and any input on (0.5, 1.0] should have + # 0.25 subtracted from it. + class_agnostic_x = np.asarray([0.0, 0.5, 1.0]) + class_agnostic_y = np.asarray([0.0, 0.25, 0.75]) + calibration_config = calibration_pb2.CalibrationConfig() + self._add_function_approximation_to_calibration_proto( + calibration_config, class_agnostic_x, class_agnostic_y, class_id=None) + + od_graph = tf.Graph() + with self.test_session(graph=od_graph) as sess: + calibration_fn = calibration_builder.build(calibration_config) + # batch_size = 2, num_classes = 2, num_anchors = 2. + class_predictions_with_background = tf.constant( + [[[0.1, 0.2, 0.3], + [0.4, 0.5, 0.0]], + [[0.6, 0.7, 0.8], + [0.9, 1.0, 1.0]]], dtype=tf.float32) + + # Everything should map to 0.5 if classes are ignored. + calibrated_scores = calibration_fn(class_predictions_with_background) + calibrated_scores_np = sess.run(calibrated_scores) + self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15], + [0.2, 0.25, 0.0]], + [[0.35, 0.45, 0.55], + [0.65, 0.75, 0.75]]]) + + def test_multiclass_function_approximations(self): + """Tests that calibration produces correct multiclass values.""" + # Background class (0-index) maps all predictions to 0.5. + class_0_x = np.asarray([0.0, 0.5, 1.0]) + class_0_y = np.asarray([0.5, 0.5, 0.5]) + calibration_config = calibration_pb2.CalibrationConfig() + self._add_function_approximation_to_calibration_proto( + calibration_config, class_0_x, class_0_y, class_id=0) + + # Class id 1 will interpolate using these values. + class_1_x = np.asarray([0.0, 0.2, 1.0]) + class_1_y = np.asarray([0.0, 0.6, 1.0]) + self._add_function_approximation_to_calibration_proto( + calibration_config, class_1_x, class_1_y, class_id=1) + + od_graph = tf.Graph() + with self.test_session(graph=od_graph) as sess: + calibration_fn = calibration_builder.build(calibration_config) + # batch_size = 2, num_classes = 2, num_anchors = 2. + class_predictions_with_background = tf.constant( + [[[0.1, 0.2], [0.9, 0.1]], + [[0.6, 0.4], [0.08, 0.92]]], + dtype=tf.float32) + calibrated_scores = calibration_fn(class_predictions_with_background) + calibrated_scores_np = sess.run(calibrated_scores) + self.assertAllClose(calibrated_scores_np, [[[0.5, 0.6], [0.5, 0.3]], + [[0.5, 0.7], [0.5, 0.96]]]) + + def test_temperature_scaling(self): + """Tests that calibration produces correct temperature scaling values.""" + calibration_config = calibration_pb2.CalibrationConfig() + calibration_config.temperature_scaling_calibration.scaler = 2.0 + + od_graph = tf.Graph() + with self.test_session(graph=od_graph) as sess: + calibration_fn = calibration_builder.build(calibration_config) + # batch_size = 2, num_classes = 2, num_anchors = 2. + class_predictions_with_background = tf.constant( + [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.0]], + [[0.6, 0.7, 0.8], [0.9, 1.0, 1.0]]], + dtype=tf.float32) + calibrated_scores = calibration_fn(class_predictions_with_background) + calibrated_scores_np = sess.run(calibrated_scores) + self.assertAllClose(calibrated_scores_np, + [[[0.05, 0.1, 0.15], [0.2, 0.25, 0.0]], + [[0.3, 0.35, 0.4], [0.45, 0.5, 0.5]]]) + + def test_temperature_scaling_incorrect_value_error(self): + calibration_config = calibration_pb2.CalibrationConfig() + calibration_config.temperature_scaling_calibration.scaler = 0 + + calibration_fn = calibration_builder.build(calibration_config) + class_predictions_with_background = tf.constant( + [[[0.1, 0.2, 0.3]]], dtype=tf.float32) + with self.assertRaises(ValueError): + calibration_fn(class_predictions_with_background) + + def test_skips_class_when_calibration_parameters_not_present(self): + """Tests that graph fails when parameters not present for all classes.""" + # Only adding calibration parameters for class id = 0, even though class id + # 1 is present in the data. + class_0_x = np.asarray([0.0, 0.5, 1.0]) + class_0_y = np.asarray([0.5, 0.5, 0.5]) + calibration_config = calibration_pb2.CalibrationConfig() + self._add_function_approximation_to_calibration_proto( + calibration_config, class_0_x, class_0_y, class_id=0) + od_graph = tf.Graph() + with self.test_session(graph=od_graph) as sess: + calibration_fn = calibration_builder.build(calibration_config) + # batch_size = 2, num_classes = 2, num_anchors = 2. + class_predictions_with_background = tf.constant( + [[[0.1, 0.2], [0.9, 0.1]], + [[0.6, 0.4], [0.08, 0.92]]], + dtype=tf.float32) + calibrated_scores = calibration_fn(class_predictions_with_background) + calibrated_scores_np = sess.run(calibrated_scores) + self.assertAllClose(calibrated_scores_np, [[[0.5, 0.2], [0.5, 0.1]], + [[0.5, 0.4], [0.5, 0.92]]]) + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/dataset_builder.py b/builders/dataset_builder.py new file mode 100644 index 0000000..158abb1 --- /dev/null +++ b/builders/dataset_builder.py @@ -0,0 +1,162 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""tf.data.Dataset builder. + +Creates data sources for DetectionModels from an InputReader config. See +input_reader.proto for options. + +Note: If users wishes to also use their own InputReaders with the Object +Detection configuration framework, they should define their own builder function +that wraps the build function. +""" +import functools +import tensorflow as tf + +from object_detection.data_decoders import tf_example_decoder +from object_detection.protos import input_reader_pb2 + + +def make_initializable_iterator(dataset): + """Creates an iterator, and initializes tables. + + This is useful in cases where make_one_shot_iterator wouldn't work because + the graph contains a hash table that needs to be initialized. + + Args: + dataset: A `tf.data.Dataset` object. + + Returns: + A `tf.data.Iterator`. + """ + iterator = dataset.make_initializable_iterator() + tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) + return iterator + + +def read_dataset(file_read_func, input_files, config): + """Reads a dataset, and handles repetition and shuffling. + + Args: + file_read_func: Function to use in tf.contrib.data.parallel_interleave, to + read every individual file into a tf.data.Dataset. + input_files: A list of file paths to read. + config: A input_reader_builder.InputReader object. + + Returns: + A tf.data.Dataset of (undecoded) tf-records based on config. + + Raises: + RuntimeError: If no files are found at the supplied path(s). + """ + # Shard, shuffle, and read files. + filenames = tf.gfile.Glob(input_files) + if not filenames: + raise RuntimeError('Did not find any input files matching the glob pattern ' + '{}'.format(input_files)) + num_readers = config.num_readers + if num_readers > len(filenames): + num_readers = len(filenames) + tf.logging.warning('num_readers has been reduced to %d to match input file ' + 'shards.' % num_readers) + filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) + if config.shuffle: + filename_dataset = filename_dataset.shuffle( + config.filenames_shuffle_buffer_size) + elif num_readers > 1: + tf.logging.warning('`shuffle` is false, but the input data stream is ' + 'still slightly shuffled since `num_readers` > 1.') + filename_dataset = filename_dataset.repeat(config.num_epochs or None) + records_dataset = filename_dataset.apply( + tf.contrib.data.parallel_interleave( + file_read_func, + cycle_length=num_readers, + block_length=config.read_block_length, + sloppy=config.shuffle)) + if config.shuffle: + records_dataset = records_dataset.shuffle(config.shuffle_buffer_size) + return records_dataset + + +def build(input_reader_config, batch_size=None, transform_input_data_fn=None): + """Builds a tf.data.Dataset. + + Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all + records. Applies a padded batch to the resulting dataset. + + Args: + input_reader_config: A input_reader_pb2.InputReader object. + batch_size: Batch size. If batch size is None, no batching is performed. + transform_input_data_fn: Function to apply transformation to all records, + or None if no extra decoding is required. + + Returns: + A tf.data.Dataset based on the input_reader_config. + + Raises: + ValueError: On invalid input reader proto. + ValueError: If no input paths are specified. + """ + if not isinstance(input_reader_config, input_reader_pb2.InputReader): + raise ValueError('input_reader_config not of type ' + 'input_reader_pb2.InputReader.') + + if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': + config = input_reader_config.tf_record_input_reader + if not config.input_path: + raise ValueError('At least one input path must be specified in ' + '`input_reader_config`.') + + label_map_proto_file = None + if input_reader_config.HasField('label_map_path'): + label_map_proto_file = input_reader_config.label_map_path + decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=input_reader_config.load_instance_masks, + load_multiclass_scores=input_reader_config.load_multiclass_scores, + instance_mask_type=input_reader_config.mask_type, + label_map_proto_file=label_map_proto_file, + use_display_name=input_reader_config.use_display_name, + num_additional_channels=input_reader_config.num_additional_channels) + + def process_fn(value): + """Sets up tf graph that decodes, transforms and pads input data.""" + processed_tensors = decoder.decode(value) + if transform_input_data_fn is not None: + processed_tensors = transform_input_data_fn(processed_tensors) + return processed_tensors + + dataset = read_dataset( + functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000), + config.input_path[:], input_reader_config) + if input_reader_config.sample_1_of_n_examples > 1: + dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0) + # TODO(rathodv): make batch size a required argument once the old binaries + # are deleted. + if batch_size: + num_parallel_calls = batch_size * input_reader_config.num_parallel_batches + else: + num_parallel_calls = input_reader_config.num_parallel_map_calls + # TODO(b/123952794): Migrate to V2 function. + if hasattr(dataset, 'map_with_legacy_function'): + data_map_fn = dataset.map_with_legacy_function + else: + data_map_fn = dataset.map + dataset = data_map_fn(process_fn, num_parallel_calls=num_parallel_calls) + if batch_size: + dataset = dataset.apply( + tf.contrib.data.batch_and_drop_remainder(batch_size)) + dataset = dataset.prefetch(input_reader_config.num_prefetch_batches) + return dataset + + raise ValueError('Unsupported input_reader_config.') diff --git a/builders/dataset_builder_test.py b/builders/dataset_builder_test.py new file mode 100644 index 0000000..7867731 --- /dev/null +++ b/builders/dataset_builder_test.py @@ -0,0 +1,356 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dataset_builder.""" + +import os +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import dataset_builder +from object_detection.core import standard_fields as fields +from object_detection.protos import input_reader_pb2 +from object_detection.utils import dataset_util + + +class DatasetBuilderTest(tf.test.TestCase): + + def create_tf_record(self, has_additional_channels=False, num_examples=1): + path = os.path.join(self.get_temp_dir(), 'tfrecord') + writer = tf.python_io.TFRecordWriter(path) + + image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) + additional_channels_tensor = np.random.randint( + 255, size=(4, 5, 1)).astype(np.uint8) + flat_mask = (4 * 5) * [1.0] + with self.test_session(): + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() + encoded_additional_channels_jpeg = tf.image.encode_jpeg( + tf.constant(additional_channels_tensor)).eval() + for i in range(num_examples): + features = { + 'image/source_id': dataset_util.bytes_feature(str(i)), + 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), + 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), + 'image/height': dataset_util.int64_feature(4), + 'image/width': dataset_util.int64_feature(5), + 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), + 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), + 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), + 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), + 'image/object/class/label': dataset_util.int64_list_feature([2]), + 'image/object/mask': dataset_util.float_list_feature(flat_mask), + } + if has_additional_channels: + additional_channels_key = 'image/additional_channels/encoded' + features[additional_channels_key] = dataset_util.bytes_list_feature( + [encoded_additional_channels_jpeg] * 2) + example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(example.SerializeToString()) + writer.close() + + return path + + def test_build_tf_record_input_reader(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build(input_reader_proto, batch_size=1)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + + self.assertTrue( + fields.InputDataFields.groundtruth_instance_masks not in output_dict) + self.assertEquals((1, 4, 5, 3), + output_dict[fields.InputDataFields.image].shape) + self.assertAllEqual([[2]], + output_dict[fields.InputDataFields.groundtruth_classes]) + self.assertEquals( + (1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) + self.assertAllEqual( + [0.0, 0.0, 1.0, 1.0], + output_dict[fields.InputDataFields.groundtruth_boxes][0][0]) + + def test_build_tf_record_input_reader_and_load_instance_masks(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + load_instance_masks: true + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build(input_reader_proto, batch_size=1)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + self.assertAllEqual( + (1, 1, 4, 5), + output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) + + def test_build_tf_record_input_reader_with_batch_size_two(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + + def one_hot_class_encoding_fn(tensor_dict): + tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( + tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3) + return tensor_dict + + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build( + input_reader_proto, + transform_input_data_fn=one_hot_class_encoding_fn, + batch_size=2)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + + self.assertAllEqual([2, 4, 5, 3], + output_dict[fields.InputDataFields.image].shape) + self.assertAllEqual( + [2, 1, 3], + output_dict[fields.InputDataFields.groundtruth_classes].shape) + self.assertAllEqual( + [2, 1, 4], output_dict[fields.InputDataFields.groundtruth_boxes].shape) + self.assertAllEqual([[[0.0, 0.0, 1.0, 1.0]], [[0.0, 0.0, 1.0, 1.0]]], + output_dict[fields.InputDataFields.groundtruth_boxes]) + + def test_build_tf_record_input_reader_with_batch_size_two_and_masks(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + load_instance_masks: true + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + + def one_hot_class_encoding_fn(tensor_dict): + tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( + tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3) + return tensor_dict + + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build( + input_reader_proto, + transform_input_data_fn=one_hot_class_encoding_fn, + batch_size=2)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + + self.assertAllEqual( + [2, 1, 4, 5], + output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) + + def test_raises_error_with_no_input_paths(self): + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + load_instance_masks: true + """ + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + with self.assertRaises(ValueError): + dataset_builder.build(input_reader_proto, batch_size=1) + + def test_sample_all_data(self): + tf_record_path = self.create_tf_record(num_examples=2) + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + sample_1_of_n_examples: 1 + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build(input_reader_proto, batch_size=1)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id]) + output_dict = sess.run(tensor_dict) + self.assertEquals(['1'], output_dict[fields.InputDataFields.source_id]) + + def test_sample_one_of_n_shards(self): + tf_record_path = self.create_tf_record(num_examples=4) + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + sample_1_of_n_examples: 2 + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = dataset_builder.make_initializable_iterator( + dataset_builder.build(input_reader_proto, batch_size=1)).get_next() + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id]) + output_dict = sess.run(tensor_dict) + self.assertEquals(['2'], output_dict[fields.InputDataFields.source_id]) + + +class ReadDatasetTest(tf.test.TestCase): + + def setUp(self): + self._path_template = os.path.join(self.get_temp_dir(), 'examples_%s.txt') + + for i in range(5): + path = self._path_template % i + with tf.gfile.Open(path, 'wb') as f: + f.write('\n'.join([str(i + 1), str((i + 1) * 10)])) + + self._shuffle_path_template = os.path.join(self.get_temp_dir(), + 'shuffle_%s.txt') + for i in range(2): + path = self._shuffle_path_template % i + with tf.gfile.Open(path, 'wb') as f: + f.write('\n'.join([str(i)] * 5)) + + def _get_dataset_next(self, files, config, batch_size): + + def decode_func(value): + return [tf.string_to_number(value, out_type=tf.int32)] + + dataset = dataset_builder.read_dataset(tf.data.TextLineDataset, files, + config) + dataset = dataset.map(decode_func) + dataset = dataset.batch(batch_size) + return dataset.make_one_shot_iterator().get_next() + + def test_make_initializable_iterator_with_hashTable(self): + keys = [1, 0, -1] + dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]]) + table = tf.contrib.lookup.HashTable( + initializer=tf.contrib.lookup.KeyValueTensorInitializer( + keys=keys, values=list(reversed(keys))), + default_value=100) + dataset = dataset.map(table.lookup) + data = dataset_builder.make_initializable_iterator(dataset).get_next() + init = tf.tables_initializer() + + with self.test_session() as sess: + sess.run(init) + self.assertAllEqual(sess.run(data), [-1, 100, 1, 100]) + + def test_read_dataset(self): + config = input_reader_pb2.InputReader() + config.num_readers = 1 + config.shuffle = False + + data = self._get_dataset_next( + [self._path_template % '*'], config, batch_size=20) + with self.test_session() as sess: + self.assertAllEqual( + sess.run(data), [[ + 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5, + 50 + ]]) + + def test_reduce_num_reader(self): + config = input_reader_pb2.InputReader() + config.num_readers = 10 + config.shuffle = False + + data = self._get_dataset_next( + [self._path_template % '*'], config, batch_size=20) + with self.test_session() as sess: + self.assertAllEqual( + sess.run(data), [[ + 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5, + 50 + ]]) + + def test_enable_shuffle(self): + config = input_reader_pb2.InputReader() + config.num_readers = 1 + config.shuffle = True + + tf.set_random_seed(1) # Set graph level seed. + data = self._get_dataset_next( + [self._shuffle_path_template % '*'], config, batch_size=10) + expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + with self.test_session() as sess: + self.assertTrue( + np.any(np.not_equal(sess.run(data), expected_non_shuffle_output))) + + def test_disable_shuffle_(self): + config = input_reader_pb2.InputReader() + config.num_readers = 1 + config.shuffle = False + + data = self._get_dataset_next( + [self._shuffle_path_template % '*'], config, batch_size=10) + expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + with self.test_session() as sess: + self.assertAllEqual(sess.run(data), [expected_non_shuffle_output]) + + def test_read_dataset_single_epoch(self): + config = input_reader_pb2.InputReader() + config.num_epochs = 1 + config.num_readers = 1 + config.shuffle = False + + data = self._get_dataset_next( + [self._path_template % '0'], config, batch_size=30) + with self.test_session() as sess: + # First batch will retrieve as much as it can, second batch will fail. + self.assertAllEqual(sess.run(data), [[1, 10]]) + self.assertRaises(tf.errors.OutOfRangeError, sess.run, data) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/graph_rewriter_builder.py b/builders/graph_rewriter_builder.py new file mode 100644 index 0000000..53267f3 --- /dev/null +++ b/builders/graph_rewriter_builder.py @@ -0,0 +1,45 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions for quantized training and evaluation.""" + +import tensorflow as tf + + +def build(graph_rewriter_config, is_training): + """Returns a function that modifies default graph based on options. + + Args: + graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto. + is_training: whether in training of eval mode. + """ + def graph_rewrite_fn(): + """Function to quantize weights and activation of the default graph.""" + if (graph_rewriter_config.quantization.weight_bits != 8 or + graph_rewriter_config.quantization.activation_bits != 8): + raise ValueError('Only 8bit quantization is supported') + + # Quantize the graph by inserting quantize ops for weights and activations + if is_training: + tf.contrib.quantize.experimental_create_training_graph( + input_graph=tf.get_default_graph(), + quant_delay=graph_rewriter_config.quantization.delay + ) + else: + tf.contrib.quantize.experimental_create_eval_graph( + input_graph=tf.get_default_graph() + ) + + tf.contrib.layers.summarize_collection('quant_vars') + return graph_rewrite_fn diff --git a/builders/graph_rewriter_builder_test.py b/builders/graph_rewriter_builder_test.py new file mode 100644 index 0000000..72730e7 --- /dev/null +++ b/builders/graph_rewriter_builder_test.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for graph_rewriter_builder.""" +import mock +import tensorflow as tf +from object_detection.builders import graph_rewriter_builder +from object_detection.protos import graph_rewriter_pb2 + + +class QuantizationBuilderTest(tf.test.TestCase): + + def testQuantizationBuilderSetsUpCorrectTrainArguments(self): + with mock.patch.object( + tf.contrib.quantize, + 'experimental_create_training_graph') as mock_quant_fn: + with mock.patch.object(tf.contrib.layers, + 'summarize_collection') as mock_summarize_col: + graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter() + graph_rewriter_proto.quantization.delay = 10 + graph_rewriter_proto.quantization.weight_bits = 8 + graph_rewriter_proto.quantization.activation_bits = 8 + graph_rewrite_fn = graph_rewriter_builder.build( + graph_rewriter_proto, is_training=True) + graph_rewrite_fn() + _, kwargs = mock_quant_fn.call_args + self.assertEqual(kwargs['input_graph'], tf.get_default_graph()) + self.assertEqual(kwargs['quant_delay'], 10) + mock_summarize_col.assert_called_with('quant_vars') + + def testQuantizationBuilderSetsUpCorrectEvalArguments(self): + with mock.patch.object(tf.contrib.quantize, + 'experimental_create_eval_graph') as mock_quant_fn: + with mock.patch.object(tf.contrib.layers, + 'summarize_collection') as mock_summarize_col: + graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter() + graph_rewriter_proto.quantization.delay = 10 + graph_rewrite_fn = graph_rewriter_builder.build( + graph_rewriter_proto, is_training=False) + graph_rewrite_fn() + _, kwargs = mock_quant_fn.call_args + self.assertEqual(kwargs['input_graph'], tf.get_default_graph()) + mock_summarize_col.assert_called_with('quant_vars') + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/hyperparams_builder.py b/builders/hyperparams_builder.py new file mode 100644 index 0000000..cd503e2 --- /dev/null +++ b/builders/hyperparams_builder.py @@ -0,0 +1,418 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Builder function to construct tf-slim arg_scope for convolution, fc ops.""" +import tensorflow as tf + +from object_detection.core import freezable_batch_norm +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import context_manager + +slim = tf.contrib.slim + + +class KerasLayerHyperparams(object): + """ + A hyperparameter configuration object for Keras layers used in + Object Detection models. + """ + + def __init__(self, hyperparams_config): + """Builds keras hyperparameter config for layers based on the proto config. + + It automatically converts from Slim layer hyperparameter configs to + Keras layer hyperparameters. Namely, it: + - Builds Keras initializers/regularizers instead of Slim ones + - sets weights_regularizer/initializer to kernel_regularizer/initializer + - converts batchnorm decay to momentum + - converts Slim l2 regularizer weights to the equivalent Keras l2 weights + + Contains a hyperparameter configuration for ops that specifies kernel + initializer, kernel regularizer, activation. Also contains parameters for + batch norm operators based on the configuration. + + Note that if the batch_norm parameters are not specified in the config + (i.e. left to default) then batch norm is excluded from the config. + + Args: + hyperparams_config: hyperparams.proto object containing + hyperparameters. + + Raises: + ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. + """ + if not isinstance(hyperparams_config, + hyperparams_pb2.Hyperparams): + raise ValueError('hyperparams_config not of type ' + 'hyperparams_pb.Hyperparams.') + + self._batch_norm_params = None + if hyperparams_config.HasField('batch_norm'): + self._batch_norm_params = _build_keras_batch_norm_params( + hyperparams_config.batch_norm) + + self._activation_fn = _build_activation_fn(hyperparams_config.activation) + # TODO(kaftan): Unclear if these kwargs apply to separable & depthwise conv + # (Those might use depthwise_* instead of kernel_*) + # We should probably switch to using build_conv2d_layer and + # build_depthwise_conv2d_layer methods instead. + self._op_params = { + 'kernel_regularizer': _build_keras_regularizer( + hyperparams_config.regularizer), + 'kernel_initializer': _build_initializer( + hyperparams_config.initializer, build_for_keras=True), + 'activation': _build_activation_fn(hyperparams_config.activation) + } + + def use_batch_norm(self): + return self._batch_norm_params is not None + + def batch_norm_params(self, **overrides): + """Returns a dict containing batchnorm layer construction hyperparameters. + + Optionally overrides values in the batchnorm hyperparam dict. Overrides + only apply to individual calls of this method, and do not affect + future calls. + + Args: + **overrides: keyword arguments to override in the hyperparams dictionary + + Returns: dict containing the layer construction keyword arguments, with + values overridden by the `overrides` keyword arguments. + """ + if self._batch_norm_params is None: + new_batch_norm_params = dict() + else: + new_batch_norm_params = self._batch_norm_params.copy() + new_batch_norm_params.update(overrides) + return new_batch_norm_params + + def build_batch_norm(self, training=None, **overrides): + """Returns a Batch Normalization layer with the appropriate hyperparams. + + If the hyperparams are configured to not use batch normalization, + this will return a Keras Lambda layer that only applies tf.Identity, + without doing any normalization. + + Optionally overrides values in the batch_norm hyperparam dict. Overrides + only apply to individual calls of this method, and do not affect + future calls. + + Args: + training: if True, the normalization layer will normalize using the batch + statistics. If False, the normalization layer will be frozen and will + act as if it is being used for inference. If None, the layer + will look up the Keras learning phase at `call` time to decide what to + do. + **overrides: batch normalization construction args to override from the + batch_norm hyperparams dictionary. + + Returns: Either a FreezableBatchNorm layer (if use_batch_norm() is True), + or a Keras Lambda layer that applies the identity (if use_batch_norm() + is False) + """ + if self.use_batch_norm(): + return freezable_batch_norm.FreezableBatchNorm( + training=training, + **self.batch_norm_params(**overrides) + ) + else: + return tf.keras.layers.Lambda(tf.identity) + + def build_activation_layer(self, name='activation'): + """Returns a Keras layer that applies the desired activation function. + + Args: + name: The name to assign the Keras layer. + Returns: A Keras lambda layer that applies the activation function + specified in the hyperparam config, or applies the identity if the + activation function is None. + """ + if self._activation_fn: + return tf.keras.layers.Lambda(self._activation_fn, name=name) + else: + return tf.keras.layers.Lambda(tf.identity, name=name) + + def params(self, include_activation=False, **overrides): + """Returns a dict containing the layer construction hyperparameters to use. + + Optionally overrides values in the returned dict. Overrides + only apply to individual calls of this method, and do not affect + future calls. + + Args: + include_activation: If False, activation in the returned dictionary will + be set to `None`, and the activation must be applied via a separate + layer created by `build_activation_layer`. If True, `activation` in the + output param dictionary will be set to the activation function + specified in the hyperparams config. + **overrides: keyword arguments to override in the hyperparams dictionary. + + Returns: dict containing the layer construction keyword arguments, with + values overridden by the `overrides` keyword arguments. + """ + new_params = self._op_params.copy() + new_params['activation'] = None + if include_activation: + new_params['activation'] = self._activation_fn + if self.use_batch_norm() and self.batch_norm_params()['center']: + new_params['use_bias'] = False + else: + new_params['use_bias'] = True + new_params.update(**overrides) + return new_params + + +def build(hyperparams_config, is_training): + """Builds tf-slim arg_scope for convolution ops based on the config. + + Returns an arg_scope to use for convolution ops containing weights + initializer, weights regularizer, activation function, batch norm function + and batch norm parameters based on the configuration. + + Note that if no normalization parameters are specified in the config, + (i.e. left to default) then both batch norm and group norm are excluded + from the arg_scope. + + The batch norm parameters are set for updates based on `is_training` argument + and conv_hyperparams_config.batch_norm.train parameter. During training, they + are updated only if batch_norm.train parameter is true. However, during eval, + no updates are made to the batch norm variables. In both cases, their current + values are used during forward pass. + + Args: + hyperparams_config: hyperparams.proto object containing + hyperparameters. + is_training: Whether the network is in training mode. + + Returns: + arg_scope_fn: A function to construct tf-slim arg_scope containing + hyperparameters for ops. + + Raises: + ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. + """ + if not isinstance(hyperparams_config, + hyperparams_pb2.Hyperparams): + raise ValueError('hyperparams_config not of type ' + 'hyperparams_pb.Hyperparams.') + + normalizer_fn = None + batch_norm_params = None + if hyperparams_config.HasField('batch_norm'): + normalizer_fn = slim.batch_norm + batch_norm_params = _build_batch_norm_params( + hyperparams_config.batch_norm, is_training) + if hyperparams_config.HasField('group_norm'): + normalizer_fn = tf.contrib.layers.group_norm + affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose] + if hyperparams_config.HasField('op') and ( + hyperparams_config.op == hyperparams_pb2.Hyperparams.FC): + affected_ops = [slim.fully_connected] + def scope_fn(): + with (slim.arg_scope([slim.batch_norm], **batch_norm_params) + if batch_norm_params is not None else + context_manager.IdentityContextManager()): + with slim.arg_scope( + affected_ops, + weights_regularizer=_build_slim_regularizer( + hyperparams_config.regularizer), + weights_initializer=_build_initializer( + hyperparams_config.initializer), + activation_fn=_build_activation_fn(hyperparams_config.activation), + normalizer_fn=normalizer_fn) as sc: + return sc + + return scope_fn + + +def _build_activation_fn(activation_fn): + """Builds a callable activation from config. + + Args: + activation_fn: hyperparams_pb2.Hyperparams.activation + + Returns: + Callable activation function. + + Raises: + ValueError: On unknown activation function. + """ + if activation_fn == hyperparams_pb2.Hyperparams.NONE: + return None + if activation_fn == hyperparams_pb2.Hyperparams.RELU: + return tf.nn.relu + if activation_fn == hyperparams_pb2.Hyperparams.RELU_6: + return tf.nn.relu6 + raise ValueError('Unknown activation function: {}'.format(activation_fn)) + + +def _build_slim_regularizer(regularizer): + """Builds a tf-slim regularizer from config. + + Args: + regularizer: hyperparams_pb2.Hyperparams.regularizer proto. + + Returns: + tf-slim regularizer. + + Raises: + ValueError: On unknown regularizer. + """ + regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') + if regularizer_oneof == 'l1_regularizer': + return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight)) + if regularizer_oneof == 'l2_regularizer': + return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight)) + if regularizer_oneof is None: + return None + raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) + + +def _build_keras_regularizer(regularizer): + """Builds a keras regularizer from config. + + Args: + regularizer: hyperparams_pb2.Hyperparams.regularizer proto. + + Returns: + Keras regularizer. + + Raises: + ValueError: On unknown regularizer. + """ + regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') + if regularizer_oneof == 'l1_regularizer': + return tf.keras.regularizers.l1(float(regularizer.l1_regularizer.weight)) + if regularizer_oneof == 'l2_regularizer': + # The Keras L2 regularizer weight differs from the Slim L2 regularizer + # weight by a factor of 2 + return tf.keras.regularizers.l2( + float(regularizer.l2_regularizer.weight * 0.5)) + raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) + + +def _build_initializer(initializer, build_for_keras=False): + """Build a tf initializer from config. + + Args: + initializer: hyperparams_pb2.Hyperparams.regularizer proto. + build_for_keras: Whether the initializers should be built for Keras + operators. If false builds for Slim. + + Returns: + tf initializer. + + Raises: + ValueError: On unknown initializer. + """ + initializer_oneof = initializer.WhichOneof('initializer_oneof') + if initializer_oneof == 'truncated_normal_initializer': + return tf.truncated_normal_initializer( + mean=initializer.truncated_normal_initializer.mean, + stddev=initializer.truncated_normal_initializer.stddev) + if initializer_oneof == 'random_normal_initializer': + return tf.random_normal_initializer( + mean=initializer.random_normal_initializer.mean, + stddev=initializer.random_normal_initializer.stddev) + if initializer_oneof == 'variance_scaling_initializer': + enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. + DESCRIPTOR.enum_types_by_name['Mode']) + mode = enum_descriptor.values_by_number[initializer. + variance_scaling_initializer. + mode].name + if build_for_keras: + if initializer.variance_scaling_initializer.uniform: + return tf.variance_scaling_initializer( + scale=initializer.variance_scaling_initializer.factor, + mode=mode.lower(), + distribution='uniform') + else: + # In TF 1.9 release and earlier, the truncated_normal distribution was + # not supported correctly. So, in these earlier versions of tensorflow, + # the ValueError will be raised, and we manually truncate the + # distribution scale. + # + # It is insufficient to just set distribution to `normal` from the + # start, because the `normal` distribution in newer Tensorflow versions + # creates a truncated distribution, whereas it created untruncated + # distributions in older versions. + try: + return tf.variance_scaling_initializer( + scale=initializer.variance_scaling_initializer.factor, + mode=mode.lower(), + distribution='truncated_normal') + except ValueError: + truncate_constant = 0.87962566103423978 + truncated_scale = initializer.variance_scaling_initializer.factor / ( + truncate_constant * truncate_constant + ) + return tf.variance_scaling_initializer( + scale=truncated_scale, + mode=mode.lower(), + distribution='normal') + + else: + return slim.variance_scaling_initializer( + factor=initializer.variance_scaling_initializer.factor, + mode=mode, + uniform=initializer.variance_scaling_initializer.uniform) + raise ValueError('Unknown initializer function: {}'.format( + initializer_oneof)) + + +def _build_batch_norm_params(batch_norm, is_training): + """Build a dictionary of batch_norm params from config. + + Args: + batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. + is_training: Whether the models is in training mode. + + Returns: + A dictionary containing batch_norm parameters. + """ + batch_norm_params = { + 'decay': batch_norm.decay, + 'center': batch_norm.center, + 'scale': batch_norm.scale, + 'epsilon': batch_norm.epsilon, + # Remove is_training parameter from here and deprecate it in the proto + # once we refactor Faster RCNN models to set is_training through an outer + # arg_scope in the meta architecture. + 'is_training': is_training and batch_norm.train, + } + return batch_norm_params + + +def _build_keras_batch_norm_params(batch_norm): + """Build a dictionary of Keras BatchNormalization params from config. + + Args: + batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. + + Returns: + A dictionary containing Keras BatchNormalization parameters. + """ + # Note: Although decay is defined to be 1 - momentum in batch_norm, + # decay in the slim batch_norm layers was erroneously defined and is + # actually the same as momentum in the Keras batch_norm layers. + # For context, see: github.com/keras-team/keras/issues/6839 + batch_norm_params = { + 'momentum': batch_norm.decay, + 'center': batch_norm.center, + 'scale': batch_norm.scale, + 'epsilon': batch_norm.epsilon, + } + return batch_norm_params diff --git a/builders/hyperparams_builder_test.py b/builders/hyperparams_builder_test.py new file mode 100644 index 0000000..a83b9ee --- /dev/null +++ b/builders/hyperparams_builder_test.py @@ -0,0 +1,865 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests object_detection.core.hyperparams_builder.""" + +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import hyperparams_builder +from object_detection.core import freezable_batch_norm +from object_detection.protos import hyperparams_pb2 + +slim = tf.contrib.slim + + +def _get_scope_key(op): + return getattr(op, '_key_op', str(op)) + + +class HyperparamsBuilderTest(tf.test.TestCase): + + def test_default_arg_scope_has_conv2d_op(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + self.assertTrue(_get_scope_key(slim.conv2d) in scope) + + def test_default_arg_scope_has_separable_conv2d_op(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope) + + def test_default_arg_scope_has_conv2d_transpose_op(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope) + + def test_explicit_fc_op_arg_scope_has_fully_connected_op(self): + conv_hyperparams_text_proto = """ + op: FC + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + self.assertTrue(_get_scope_key(slim.fully_connected) in scope) + + def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + kwargs_1, kwargs_2, kwargs_3 = scope.values() + self.assertDictEqual(kwargs_1, kwargs_2) + self.assertDictEqual(kwargs_1, kwargs_3) + + def test_return_l1_regularized_weights(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.5 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope.values()[0] + regularizer = conv_scope_arguments['weights_regularizer'] + weights = np.array([1., -1, 4., 2.]) + with self.test_session() as sess: + result = sess.run(regularizer(tf.constant(weights))) + self.assertAllClose(np.abs(weights).sum() * 0.5, result) + + def test_return_l1_regularized_weights_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.5 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + regularizer = keras_config.params()['kernel_regularizer'] + weights = np.array([1., -1, 4., 2.]) + with self.test_session() as sess: + result = sess.run(regularizer(tf.constant(weights))) + self.assertAllClose(np.abs(weights).sum() * 0.5, result) + + def test_return_l2_regularizer_weights(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + weight: 0.42 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + + regularizer = conv_scope_arguments['weights_regularizer'] + weights = np.array([1., -1, 4., 2.]) + with self.test_session() as sess: + result = sess.run(regularizer(tf.constant(weights))) + self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) + + def test_return_l2_regularizer_weights_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + weight: 0.42 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + regularizer = keras_config.params()['kernel_regularizer'] + weights = np.array([1., -1, 4., 2.]) + with self.test_session() as sess: + result = sess.run(regularizer(tf.constant(weights))) + self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) + + def test_return_non_default_batch_norm_params_with_train_during_train(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + decay: 0.7 + center: false + scale: true + epsilon: 0.03 + train: true + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) + batch_norm_params = scope[_get_scope_key(slim.batch_norm)] + self.assertAlmostEqual(batch_norm_params['decay'], 0.7) + self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) + self.assertFalse(batch_norm_params['center']) + self.assertTrue(batch_norm_params['scale']) + self.assertTrue(batch_norm_params['is_training']) + + def test_return_non_default_batch_norm_params_keras( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + decay: 0.7 + center: false + scale: true + epsilon: 0.03 + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + self.assertTrue(keras_config.use_batch_norm()) + batch_norm_params = keras_config.batch_norm_params() + self.assertAlmostEqual(batch_norm_params['momentum'], 0.7) + self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) + self.assertFalse(batch_norm_params['center']) + self.assertTrue(batch_norm_params['scale']) + + batch_norm_layer = keras_config.build_batch_norm() + self.assertTrue(isinstance(batch_norm_layer, + freezable_batch_norm.FreezableBatchNorm)) + + def test_return_non_default_batch_norm_params_keras_override( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + decay: 0.7 + center: false + scale: true + epsilon: 0.03 + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + self.assertTrue(keras_config.use_batch_norm()) + batch_norm_params = keras_config.batch_norm_params(momentum=0.4) + self.assertAlmostEqual(batch_norm_params['momentum'], 0.4) + self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) + self.assertFalse(batch_norm_params['center']) + self.assertTrue(batch_norm_params['scale']) + + def test_return_batch_norm_params_with_notrain_during_eval(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + decay: 0.7 + center: false + scale: true + epsilon: 0.03 + train: true + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=False) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) + batch_norm_params = scope[_get_scope_key(slim.batch_norm)] + self.assertAlmostEqual(batch_norm_params['decay'], 0.7) + self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) + self.assertFalse(batch_norm_params['center']) + self.assertTrue(batch_norm_params['scale']) + self.assertFalse(batch_norm_params['is_training']) + + def test_return_batch_norm_params_with_notrain_when_train_is_false(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + decay: 0.7 + center: false + scale: true + epsilon: 0.03 + train: false + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) + batch_norm_params = scope[_get_scope_key(slim.batch_norm)] + self.assertAlmostEqual(batch_norm_params['decay'], 0.7) + self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) + self.assertFalse(batch_norm_params['center']) + self.assertTrue(batch_norm_params['scale']) + self.assertFalse(batch_norm_params['is_training']) + + def test_do_not_use_batch_norm_if_default(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['normalizer_fn'], None) + + def test_do_not_use_batch_norm_if_default_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + self.assertFalse(keras_config.use_batch_norm()) + self.assertEqual(keras_config.batch_norm_params(), {}) + + # The batch norm builder should build an identity Lambda layer + identity_layer = keras_config.build_batch_norm() + self.assertTrue(isinstance(identity_layer, + tf.keras.layers.Lambda)) + + def test_use_none_activation(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: NONE + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['activation_fn'], None) + + def test_use_none_activation_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: NONE + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + self.assertEqual(keras_config.params()['activation'], None) + self.assertEqual( + keras_config.params(include_activation=True)['activation'], None) + activation_layer = keras_config.build_activation_layer() + self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda)) + self.assertEqual(activation_layer.function, tf.identity) + + def test_use_relu_activation(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu) + + def test_use_relu_activation_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + self.assertEqual(keras_config.params()['activation'], None) + self.assertEqual( + keras_config.params(include_activation=True)['activation'], tf.nn.relu) + activation_layer = keras_config.build_activation_layer() + self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda)) + self.assertEqual(activation_layer.function, tf.nn.relu) + + def test_use_relu_6_activation(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6) + + def test_use_relu_6_activation_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + self.assertEqual(keras_config.params()['activation'], None) + self.assertEqual( + keras_config.params(include_activation=True)['activation'], tf.nn.relu6) + activation_layer = keras_config.build_activation_layer() + self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda)) + self.assertEqual(activation_layer.function, tf.nn.relu6) + + def test_override_activation_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + activation: RELU_6 + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + new_params = keras_config.params(activation=tf.nn.relu) + self.assertEqual(new_params['activation'], tf.nn.relu) + + def _assert_variance_in_range(self, initializer, shape, variance, + tol=1e-2): + with tf.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + var = tf.get_variable( + name='test', + shape=shape, + dtype=tf.float32, + initializer=initializer) + sess.run(tf.global_variables_initializer()) + values = sess.run(var) + self.assertAllClose(np.var(values), variance, tol, tol) + + def test_variance_in_range_with_variance_scaling_initializer_fan_in(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_IN + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 100.) + + def test_variance_in_range_with_variance_scaling_initializer_fan_in_keras( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_IN + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 100.) + + def test_variance_in_range_with_variance_scaling_initializer_fan_out(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_OUT + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 40.) + + def test_variance_in_range_with_variance_scaling_initializer_fan_out_keras( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_OUT + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 40.) + + def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_AVG + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=4. / (100. + 40.)) + + def test_variance_in_range_with_variance_scaling_initializer_fan_avg_keras( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_AVG + uniform: false + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=4. / (100. + 40.)) + + def test_variance_in_range_with_variance_scaling_initializer_uniform(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_IN + uniform: true + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 100.) + + def test_variance_in_range_with_variance_scaling_initializer_uniform_keras( + self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + variance_scaling_initializer { + factor: 2.0 + mode: FAN_IN + uniform: true + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=2. / 100.) + + def test_variance_in_range_with_truncated_normal_initializer(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.8 + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=0.49, tol=1e-1) + + def test_variance_in_range_with_truncated_normal_initializer_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + mean: 0.0 + stddev: 0.8 + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=0.49, tol=1e-1) + + def test_variance_in_range_with_random_normal_initializer(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + random_normal_initializer { + mean: 0.0 + stddev: 0.8 + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + scope_fn = hyperparams_builder.build(conv_hyperparams_proto, + is_training=True) + scope = scope_fn() + conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] + initializer = conv_scope_arguments['weights_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=0.64, tol=1e-1) + + def test_variance_in_range_with_random_normal_initializer_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + random_normal_initializer { + mean: 0.0 + stddev: 0.8 + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer = keras_config.params()['kernel_initializer'] + self._assert_variance_in_range(initializer, shape=[100, 40], + variance=0.64, tol=1e-1) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/image_resizer_builder.py b/builders/image_resizer_builder.py new file mode 100644 index 0000000..bb24ef8 --- /dev/null +++ b/builders/image_resizer_builder.py @@ -0,0 +1,174 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Builder function for image resizing operations.""" +import functools +import tensorflow as tf + +from object_detection.core import preprocessor +from object_detection.protos import image_resizer_pb2 + + +def _tf_resize_method(resize_method): + """Maps image resize method from enumeration type to TensorFlow. + + Args: + resize_method: The resize_method attribute of keep_aspect_ratio_resizer or + fixed_shape_resizer. + + Returns: + method: The corresponding TensorFlow ResizeMethod. + + Raises: + ValueError: if `resize_method` is of unknown type. + """ + dict_method = { + image_resizer_pb2.BILINEAR: + tf.image.ResizeMethod.BILINEAR, + image_resizer_pb2.NEAREST_NEIGHBOR: + tf.image.ResizeMethod.NEAREST_NEIGHBOR, + image_resizer_pb2.BICUBIC: + tf.image.ResizeMethod.BICUBIC, + image_resizer_pb2.AREA: + tf.image.ResizeMethod.AREA + } + if resize_method in dict_method: + return dict_method[resize_method] + else: + raise ValueError('Unknown resize_method') + + +def build(image_resizer_config): + """Builds callable for image resizing operations. + + Args: + image_resizer_config: image_resizer.proto object containing parameters for + an image resizing operation. + + Returns: + image_resizer_fn: Callable for image resizing. This callable always takes + a rank-3 image tensor (corresponding to a single image) and returns a + rank-3 image tensor, possibly with new spatial dimensions. + + Raises: + ValueError: if `image_resizer_config` is of incorrect type. + ValueError: if `image_resizer_config.image_resizer_oneof` is of expected + type. + ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer + is used. + """ + if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer): + raise ValueError('image_resizer_config not of type ' + 'image_resizer_pb2.ImageResizer.') + + image_resizer_oneof = image_resizer_config.WhichOneof('image_resizer_oneof') + if image_resizer_oneof == 'keep_aspect_ratio_resizer': + keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer + if not (keep_aspect_ratio_config.min_dimension <= + keep_aspect_ratio_config.max_dimension): + raise ValueError('min_dimension > max_dimension') + method = _tf_resize_method(keep_aspect_ratio_config.resize_method) + per_channel_pad_value = (0, 0, 0) + if keep_aspect_ratio_config.per_channel_pad_value: + per_channel_pad_value = tuple(keep_aspect_ratio_config. + per_channel_pad_value) + image_resizer_fn = functools.partial( + preprocessor.resize_to_range, + min_dimension=keep_aspect_ratio_config.min_dimension, + max_dimension=keep_aspect_ratio_config.max_dimension, + method=method, + pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension, + per_channel_pad_value=per_channel_pad_value) + if not keep_aspect_ratio_config.convert_to_grayscale: + return image_resizer_fn + elif image_resizer_oneof == 'fixed_shape_resizer': + fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer + method = _tf_resize_method(fixed_shape_resizer_config.resize_method) + image_resizer_fn = functools.partial( + preprocessor.resize_image, + new_height=fixed_shape_resizer_config.height, + new_width=fixed_shape_resizer_config.width, + method=method) + if not fixed_shape_resizer_config.convert_to_grayscale: + return image_resizer_fn + elif image_resizer_oneof == 'identity_resizer': + def image_resizer_fn(image, masks=None, **kwargs): + del kwargs + if masks is None: + return [image, tf.shape(image)] + else: + return [image, masks, tf.shape(image)] + return image_resizer_fn + elif image_resizer_oneof == 'conditional_shape_resizer': + conditional_shape_resize_config = ( + image_resizer_config.conditional_shape_resizer) + method = _tf_resize_method(conditional_shape_resize_config.resize_method) + + if conditional_shape_resize_config.condition == ( + image_resizer_pb2.ConditionalShapeResizer.GREATER): + image_resizer_fn = functools.partial( + preprocessor.resize_to_max_dimension, + max_dimension=conditional_shape_resize_config.size_threshold, + method=method) + + elif conditional_shape_resize_config.condition == ( + image_resizer_pb2.ConditionalShapeResizer.SMALLER): + image_resizer_fn = functools.partial( + preprocessor.resize_to_min_dimension, + min_dimension=conditional_shape_resize_config.size_threshold, + method=method) + else: + raise ValueError( + 'Invalid image resizer condition option for ' + 'ConditionalShapeResizer: \'%s\'.' + % conditional_shape_resize_config.condition) + + if not conditional_shape_resize_config.convert_to_grayscale: + return image_resizer_fn + else: + raise ValueError( + 'Invalid image resizer option: \'%s\'.' % image_resizer_oneof) + + def grayscale_image_resizer(image, masks=None): + """Convert to grayscale before applying image_resizer_fn. + + Args: + image: A 3D tensor of shape [height, width, 3] + masks: (optional) rank 3 float32 tensor with shape [num_instances, height, + width] containing instance masks. + + Returns: + Note that the position of the resized_image_shape changes based on whether + masks are present. + resized_image: A 3D tensor of shape [new_height, new_width, 1], + where the image has been resized (with bilinear interpolation) so that + min(new_height, new_width) == min_dimension or + max(new_height, new_width) == max_dimension. + resized_masks: If masks is not None, also outputs masks. A 3D tensor of + shape [num_instances, new_height, new_width]. + resized_image_shape: A 1D tensor of shape [3] containing shape of the + resized image. + """ + # image_resizer_fn returns [resized_image, resized_image_shape] if + # mask==None, otherwise it returns + # [resized_image, resized_mask, resized_image_shape]. In either case, we + # only deal with first and last element of the returned list. + retval = image_resizer_fn(image, masks) + resized_image = retval[0] + resized_image_shape = retval[-1] + retval[0] = preprocessor.rgb_to_gray(resized_image) + retval[-1] = tf.concat([resized_image_shape[:-1], [1]], 0) + return retval + + return functools.partial(grayscale_image_resizer) diff --git a/builders/image_resizer_builder_test.py b/builders/image_resizer_builder_test.py new file mode 100644 index 0000000..dcf7bf1 --- /dev/null +++ b/builders/image_resizer_builder_test.py @@ -0,0 +1,216 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.builders.image_resizer_builder.""" +import numpy as np +import tensorflow as tf +from google.protobuf import text_format +from object_detection.builders import image_resizer_builder +from object_detection.protos import image_resizer_pb2 + + +class ImageResizerBuilderTest(tf.test.TestCase): + + def _shape_of_resized_random_image_given_text_proto(self, input_shape, + text_proto): + image_resizer_config = image_resizer_pb2.ImageResizer() + text_format.Merge(text_proto, image_resizer_config) + image_resizer_fn = image_resizer_builder.build(image_resizer_config) + images = tf.cast( + tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32), + dtype=tf.float32) + resized_images, _ = image_resizer_fn(images) + with self.test_session() as sess: + return sess.run(resized_images).shape + + def test_build_keep_aspect_ratio_resizer_returns_expected_shape(self): + image_resizer_text_proto = """ + keep_aspect_ratio_resizer { + min_dimension: 10 + max_dimension: 20 + } + """ + input_shape = (50, 25, 3) + expected_output_shape = (20, 10, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_keep_aspect_ratio_resizer_grayscale(self): + image_resizer_text_proto = """ + keep_aspect_ratio_resizer { + min_dimension: 10 + max_dimension: 20 + convert_to_grayscale: true + } + """ + input_shape = (50, 25, 3) + expected_output_shape = (20, 10, 1) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_keep_aspect_ratio_resizer_with_padding(self): + image_resizer_text_proto = """ + keep_aspect_ratio_resizer { + min_dimension: 10 + max_dimension: 20 + pad_to_max_dimension: true + per_channel_pad_value: 3 + per_channel_pad_value: 4 + per_channel_pad_value: 5 + } + """ + input_shape = (50, 25, 3) + expected_output_shape = (20, 20, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_built_fixed_shape_resizer_returns_expected_shape(self): + image_resizer_text_proto = """ + fixed_shape_resizer { + height: 10 + width: 20 + } + """ + input_shape = (50, 25, 3) + expected_output_shape = (10, 20, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_built_fixed_shape_resizer_grayscale(self): + image_resizer_text_proto = """ + fixed_shape_resizer { + height: 10 + width: 20 + convert_to_grayscale: true + } + """ + input_shape = (50, 25, 3) + expected_output_shape = (10, 20, 1) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_identity_resizer_returns_expected_shape(self): + image_resizer_text_proto = """ + identity_resizer { + } + """ + input_shape = (10, 20, 3) + expected_output_shape = (10, 20, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_raises_error_on_invalid_input(self): + invalid_input = 'invalid_input' + with self.assertRaises(ValueError): + image_resizer_builder.build(invalid_input) + + def _resized_image_given_text_proto(self, image, text_proto): + image_resizer_config = image_resizer_pb2.ImageResizer() + text_format.Merge(text_proto, image_resizer_config) + image_resizer_fn = image_resizer_builder.build(image_resizer_config) + image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3]) + resized_image, _ = image_resizer_fn(image_placeholder) + with self.test_session() as sess: + return sess.run(resized_image, feed_dict={image_placeholder: image}) + + def test_fixed_shape_resizer_nearest_neighbor_method(self): + image_resizer_text_proto = """ + fixed_shape_resizer { + height: 1 + width: 1 + resize_method: NEAREST_NEIGHBOR + } + """ + image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + image = np.expand_dims(image, axis=2) + image = np.tile(image, (1, 1, 3)) + image = np.expand_dims(image, axis=0) + resized_image = self._resized_image_given_text_proto( + image, image_resizer_text_proto) + vals = np.unique(resized_image).tolist() + self.assertEqual(len(vals), 1) + self.assertEqual(vals[0], 1) + + def test_build_conditional_shape_resizer_greater_returns_expected_shape(self): + image_resizer_text_proto = """ + conditional_shape_resizer { + condition: GREATER + size_threshold: 30 + } + """ + input_shape = (60, 30, 3) + expected_output_shape = (30, 15, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_conditional_shape_resizer_same_shape_with_no_resize(self): + image_resizer_text_proto = """ + conditional_shape_resizer { + condition: GREATER + size_threshold: 30 + } + """ + input_shape = (15, 15, 3) + expected_output_shape = (15, 15, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_conditional_shape_resizer_smaller_returns_expected_shape(self): + image_resizer_text_proto = """ + conditional_shape_resizer { + condition: SMALLER + size_threshold: 30 + } + """ + input_shape = (30, 15, 3) + expected_output_shape = (60, 30, 3) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_conditional_shape_resizer_grayscale(self): + image_resizer_text_proto = """ + conditional_shape_resizer { + condition: GREATER + size_threshold: 30 + convert_to_grayscale: true + } + """ + input_shape = (60, 30, 3) + expected_output_shape = (30, 15, 1) + output_shape = self._shape_of_resized_random_image_given_text_proto( + input_shape, image_resizer_text_proto) + self.assertEqual(output_shape, expected_output_shape) + + def test_build_conditional_shape_resizer_error_on_invalid_condition(self): + invalid_image_resizer_text_proto = """ + conditional_shape_resizer { + condition: INVALID + size_threshold: 30 + } + """ + with self.assertRaises(ValueError): + image_resizer_builder.build(invalid_image_resizer_text_proto) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/input_reader_builder.py b/builders/input_reader_builder.py new file mode 100644 index 0000000..8cb5e2f --- /dev/null +++ b/builders/input_reader_builder.py @@ -0,0 +1,76 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Input reader builder. + +Creates data sources for DetectionModels from an InputReader config. See +input_reader.proto for options. + +Note: If users wishes to also use their own InputReaders with the Object +Detection configuration framework, they should define their own builder function +that wraps the build function. +""" + +import tensorflow as tf + +from object_detection.data_decoders import tf_example_decoder +from object_detection.protos import input_reader_pb2 + +parallel_reader = tf.contrib.slim.parallel_reader + + +def build(input_reader_config): + """Builds a tensor dictionary based on the InputReader config. + + Args: + input_reader_config: A input_reader_pb2.InputReader object. + + Returns: + A tensor dict based on the input_reader_config. + + Raises: + ValueError: On invalid input reader proto. + ValueError: If no input paths are specified. + """ + if not isinstance(input_reader_config, input_reader_pb2.InputReader): + raise ValueError('input_reader_config not of type ' + 'input_reader_pb2.InputReader.') + + if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': + config = input_reader_config.tf_record_input_reader + if not config.input_path: + raise ValueError('At least one input path must be specified in ' + '`input_reader_config`.') + _, string_tensor = parallel_reader.parallel_read( + config.input_path[:], # Convert `RepeatedScalarContainer` to list. + reader_class=tf.TFRecordReader, + num_epochs=(input_reader_config.num_epochs + if input_reader_config.num_epochs else None), + num_readers=input_reader_config.num_readers, + shuffle=input_reader_config.shuffle, + dtypes=[tf.string, tf.string], + capacity=input_reader_config.queue_capacity, + min_after_dequeue=input_reader_config.min_after_dequeue) + + label_map_proto_file = None + if input_reader_config.HasField('label_map_path'): + label_map_proto_file = input_reader_config.label_map_path + decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=input_reader_config.load_instance_masks, + instance_mask_type=input_reader_config.mask_type, + label_map_proto_file=label_map_proto_file) + return decoder.decode(string_tensor) + + raise ValueError('Unsupported input_reader_config.') diff --git a/builders/input_reader_builder_test.py b/builders/input_reader_builder_test.py new file mode 100644 index 0000000..c2c8ef4 --- /dev/null +++ b/builders/input_reader_builder_test.py @@ -0,0 +1,129 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for input_reader_builder.""" + +import os +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import input_reader_builder +from object_detection.core import standard_fields as fields +from object_detection.protos import input_reader_pb2 +from object_detection.utils import dataset_util + + +class InputReaderBuilderTest(tf.test.TestCase): + + def create_tf_record(self): + path = os.path.join(self.get_temp_dir(), 'tfrecord') + writer = tf.python_io.TFRecordWriter(path) + + image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) + flat_mask = (4 * 5) * [1.0] + with self.test_session(): + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), + 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), + 'image/height': dataset_util.int64_feature(4), + 'image/width': dataset_util.int64_feature(5), + 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), + 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), + 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), + 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), + 'image/object/class/label': dataset_util.int64_list_feature([2]), + 'image/object/mask': dataset_util.float_list_feature(flat_mask), + })) + writer.write(example.SerializeToString()) + writer.close() + + return path + + def test_build_tf_record_input_reader(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = input_reader_builder.build(input_reader_proto) + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + + self.assertTrue(fields.InputDataFields.groundtruth_instance_masks + not in output_dict) + self.assertEquals( + (4, 5, 3), output_dict[fields.InputDataFields.image].shape) + self.assertEquals( + [2], output_dict[fields.InputDataFields.groundtruth_classes]) + self.assertEquals( + (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) + self.assertAllEqual( + [0.0, 0.0, 1.0, 1.0], + output_dict[fields.InputDataFields.groundtruth_boxes][0]) + + def test_build_tf_record_input_reader_and_load_instance_masks(self): + tf_record_path = self.create_tf_record() + + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + load_instance_masks: true + tf_record_input_reader {{ + input_path: '{0}' + }} + """.format(tf_record_path) + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + tensor_dict = input_reader_builder.build(input_reader_proto) + + with tf.train.MonitoredSession() as sess: + output_dict = sess.run(tensor_dict) + + self.assertEquals( + (4, 5, 3), output_dict[fields.InputDataFields.image].shape) + self.assertEquals( + [2], output_dict[fields.InputDataFields.groundtruth_classes]) + self.assertEquals( + (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) + self.assertAllEqual( + [0.0, 0.0, 1.0, 1.0], + output_dict[fields.InputDataFields.groundtruth_boxes][0]) + self.assertAllEqual( + (1, 4, 5), + output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) + + def test_raises_error_with_no_input_paths(self): + input_reader_text_proto = """ + shuffle: false + num_readers: 1 + load_instance_masks: true + """ + input_reader_proto = input_reader_pb2.InputReader() + text_format.Merge(input_reader_text_proto, input_reader_proto) + with self.assertRaises(ValueError): + input_reader_builder.build(input_reader_proto) + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/losses_builder.py b/builders/losses_builder.py new file mode 100644 index 0000000..2b98d0a --- /dev/null +++ b/builders/losses_builder.py @@ -0,0 +1,252 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build localization and classification losses from config.""" + +import functools +from object_detection.core import balanced_positive_negative_sampler as sampler +from object_detection.core import losses +from object_detection.protos import losses_pb2 +from object_detection.utils import ops + + +def build(loss_config): + """Build losses based on the config. + + Builds classification, localization losses and optionally a hard example miner + based on the config. + + Args: + loss_config: A losses_pb2.Loss object. + + Returns: + classification_loss: Classification loss object. + localization_loss: Localization loss object. + classification_weight: Classification loss weight. + localization_weight: Localization loss weight. + hard_example_miner: Hard example miner object. + random_example_sampler: BalancedPositiveNegativeSampler object. + + Raises: + ValueError: If hard_example_miner is used with sigmoid_focal_loss. + ValueError: If random_example_sampler is getting non-positive value as + desired positive example fraction. + """ + classification_loss = _build_classification_loss( + loss_config.classification_loss) + localization_loss = _build_localization_loss( + loss_config.localization_loss) + classification_weight = loss_config.classification_weight + localization_weight = loss_config.localization_weight + hard_example_miner = None + if loss_config.HasField('hard_example_miner'): + if (loss_config.classification_loss.WhichOneof('classification_loss') == + 'weighted_sigmoid_focal'): + raise ValueError('HardExampleMiner should not be used with sigmoid focal ' + 'loss') + hard_example_miner = build_hard_example_miner( + loss_config.hard_example_miner, + classification_weight, + localization_weight) + random_example_sampler = None + if loss_config.HasField('random_example_sampler'): + if loss_config.random_example_sampler.positive_sample_fraction <= 0: + raise ValueError('RandomExampleSampler should not use non-positive' + 'value as positive sample fraction.') + random_example_sampler = sampler.BalancedPositiveNegativeSampler( + positive_fraction=loss_config.random_example_sampler. + positive_sample_fraction) + + if loss_config.expected_loss_weights == loss_config.NONE: + expected_loss_weights_fn = None + elif loss_config.expected_loss_weights == loss_config.EXPECTED_SAMPLING: + expected_loss_weights_fn = functools.partial( + ops.expected_classification_loss_by_expected_sampling, + min_num_negative_samples=loss_config.min_num_negative_samples, + desired_negative_sampling_ratio=loss_config + .desired_negative_sampling_ratio) + elif (loss_config.expected_loss_weights == loss_config + .REWEIGHTING_UNMATCHED_ANCHORS): + expected_loss_weights_fn = functools.partial( + ops.expected_classification_loss_by_reweighting_unmatched_anchors, + min_num_negative_samples=loss_config.min_num_negative_samples, + desired_negative_sampling_ratio=loss_config + .desired_negative_sampling_ratio) + else: + raise ValueError('Not a valid value for expected_classification_loss.') + + return (classification_loss, localization_loss, classification_weight, + localization_weight, hard_example_miner, random_example_sampler, + expected_loss_weights_fn) + + +def build_hard_example_miner(config, + classification_weight, + localization_weight): + """Builds hard example miner based on the config. + + Args: + config: A losses_pb2.HardExampleMiner object. + classification_weight: Classification loss weight. + localization_weight: Localization loss weight. + + Returns: + Hard example miner. + + """ + loss_type = None + if config.loss_type == losses_pb2.HardExampleMiner.BOTH: + loss_type = 'both' + if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION: + loss_type = 'cls' + if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION: + loss_type = 'loc' + + max_negatives_per_positive = None + num_hard_examples = None + if config.max_negatives_per_positive > 0: + max_negatives_per_positive = config.max_negatives_per_positive + if config.num_hard_examples > 0: + num_hard_examples = config.num_hard_examples + hard_example_miner = losses.HardExampleMiner( + num_hard_examples=num_hard_examples, + iou_threshold=config.iou_threshold, + loss_type=loss_type, + cls_loss_weight=classification_weight, + loc_loss_weight=localization_weight, + max_negatives_per_positive=max_negatives_per_positive, + min_negatives_per_image=config.min_negatives_per_image) + return hard_example_miner + + +def build_faster_rcnn_classification_loss(loss_config): + """Builds a classification loss for Faster RCNN based on the loss config. + + Args: + loss_config: A losses_pb2.ClassificationLoss object. + + Returns: + Loss based on the config. + + Raises: + ValueError: On invalid loss_config. + """ + if not isinstance(loss_config, losses_pb2.ClassificationLoss): + raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') + + loss_type = loss_config.WhichOneof('classification_loss') + + if loss_type == 'weighted_sigmoid': + return losses.WeightedSigmoidClassificationLoss() + if loss_type == 'weighted_softmax': + config = loss_config.weighted_softmax + return losses.WeightedSoftmaxClassificationLoss( + logit_scale=config.logit_scale) + if loss_type == 'weighted_logits_softmax': + config = loss_config.weighted_logits_softmax + return losses.WeightedSoftmaxClassificationAgainstLogitsLoss( + logit_scale=config.logit_scale) + if loss_type == 'weighted_sigmoid_focal': + config = loss_config.weighted_sigmoid_focal + alpha = None + if config.HasField('alpha'): + alpha = config.alpha + return losses.SigmoidFocalClassificationLoss( + gamma=config.gamma, + alpha=alpha) + + # By default, Faster RCNN second stage classifier uses Softmax loss + # with anchor-wise outputs. + config = loss_config.weighted_softmax + return losses.WeightedSoftmaxClassificationLoss( + logit_scale=config.logit_scale) + + +def _build_localization_loss(loss_config): + """Builds a localization loss based on the loss config. + + Args: + loss_config: A losses_pb2.LocalizationLoss object. + + Returns: + Loss based on the config. + + Raises: + ValueError: On invalid loss_config. + """ + if not isinstance(loss_config, losses_pb2.LocalizationLoss): + raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.') + + loss_type = loss_config.WhichOneof('localization_loss') + + if loss_type == 'weighted_l2': + return losses.WeightedL2LocalizationLoss() + + if loss_type == 'weighted_smooth_l1': + return losses.WeightedSmoothL1LocalizationLoss( + loss_config.weighted_smooth_l1.delta) + + if loss_type == 'weighted_iou': + return losses.WeightedIOULocalizationLoss() + + raise ValueError('Empty loss config.') + + +def _build_classification_loss(loss_config): + """Builds a classification loss based on the loss config. + + Args: + loss_config: A losses_pb2.ClassificationLoss object. + + Returns: + Loss based on the config. + + Raises: + ValueError: On invalid loss_config. + """ + if not isinstance(loss_config, losses_pb2.ClassificationLoss): + raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') + + loss_type = loss_config.WhichOneof('classification_loss') + + if loss_type == 'weighted_sigmoid': + return losses.WeightedSigmoidClassificationLoss() + + if loss_type == 'weighted_sigmoid_focal': + config = loss_config.weighted_sigmoid_focal + alpha = None + if config.HasField('alpha'): + alpha = config.alpha + return losses.SigmoidFocalClassificationLoss( + gamma=config.gamma, + alpha=alpha) + + if loss_type == 'weighted_softmax': + config = loss_config.weighted_softmax + return losses.WeightedSoftmaxClassificationLoss( + logit_scale=config.logit_scale) + + if loss_type == 'weighted_logits_softmax': + config = loss_config.weighted_logits_softmax + return losses.WeightedSoftmaxClassificationAgainstLogitsLoss( + logit_scale=config.logit_scale) + + if loss_type == 'bootstrapped_sigmoid': + config = loss_config.bootstrapped_sigmoid + return losses.BootstrappedSigmoidClassificationLoss( + alpha=config.alpha, + bootstrap_type=('hard' if config.hard_bootstrap else 'soft')) + + raise ValueError('Empty loss config.') diff --git a/builders/losses_builder_test.py b/builders/losses_builder_test.py new file mode 100644 index 0000000..24b96b4 --- /dev/null +++ b/builders/losses_builder_test.py @@ -0,0 +1,561 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for losses_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import losses_builder +from object_detection.core import losses +from object_detection.protos import losses_pb2 +from object_detection.utils import ops + + +class LocalizationLossBuilderTest(tf.test.TestCase): + + def test_build_weighted_l2_localization_loss(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(localization_loss, + losses.WeightedL2LocalizationLoss)) + + def test_build_weighted_smooth_l1_localization_loss_default_delta(self): + losses_text_proto = """ + localization_loss { + weighted_smooth_l1 { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(localization_loss, + losses.WeightedSmoothL1LocalizationLoss)) + self.assertAlmostEqual(localization_loss._delta, 1.0) + + def test_build_weighted_smooth_l1_localization_loss_non_default_delta(self): + losses_text_proto = """ + localization_loss { + weighted_smooth_l1 { + delta: 0.1 + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(localization_loss, + losses.WeightedSmoothL1LocalizationLoss)) + self.assertAlmostEqual(localization_loss._delta, 0.1) + + def test_build_weighted_iou_localization_loss(self): + losses_text_proto = """ + localization_loss { + weighted_iou { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(localization_loss, + losses.WeightedIOULocalizationLoss)) + + def test_anchorwise_output(self): + losses_text_proto = """ + localization_loss { + weighted_smooth_l1 { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(localization_loss, + losses.WeightedSmoothL1LocalizationLoss)) + predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) + targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) + weights = tf.constant([[1.0, 1.0]]) + loss = localization_loss(predictions, targets, weights=weights) + self.assertEqual(loss.shape, [1, 2]) + + def test_raise_error_on_empty_localization_config(self): + losses_text_proto = """ + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + with self.assertRaises(ValueError): + losses_builder._build_localization_loss(losses_proto) + + +class ClassificationLossBuilderTest(tf.test.TestCase): + + def test_build_weighted_sigmoid_classification_loss(self): + losses_text_proto = """ + classification_loss { + weighted_sigmoid { + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSigmoidClassificationLoss)) + + def test_build_weighted_sigmoid_focal_classification_loss(self): + losses_text_proto = """ + classification_loss { + weighted_sigmoid_focal { + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.SigmoidFocalClassificationLoss)) + self.assertAlmostEqual(classification_loss._alpha, None) + self.assertAlmostEqual(classification_loss._gamma, 2.0) + + def test_build_weighted_sigmoid_focal_loss_non_default(self): + losses_text_proto = """ + classification_loss { + weighted_sigmoid_focal { + alpha: 0.25 + gamma: 3.0 + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.SigmoidFocalClassificationLoss)) + self.assertAlmostEqual(classification_loss._alpha, 0.25) + self.assertAlmostEqual(classification_loss._gamma, 3.0) + + def test_build_weighted_softmax_classification_loss(self): + losses_text_proto = """ + classification_loss { + weighted_softmax { + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + + def test_build_weighted_logits_softmax_classification_loss(self): + losses_text_proto = """ + classification_loss { + weighted_logits_softmax { + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue( + isinstance(classification_loss, + losses.WeightedSoftmaxClassificationAgainstLogitsLoss)) + + def test_build_weighted_softmax_classification_loss_with_logit_scale(self): + losses_text_proto = """ + classification_loss { + weighted_softmax { + logit_scale: 2.0 + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + + def test_build_bootstrapped_sigmoid_classification_loss(self): + losses_text_proto = """ + classification_loss { + bootstrapped_sigmoid { + alpha: 0.5 + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.BootstrappedSigmoidClassificationLoss)) + + def test_anchorwise_output(self): + losses_text_proto = """ + classification_loss { + weighted_sigmoid { + anchorwise_output: true + } + } + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSigmoidClassificationLoss)) + predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]]) + targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]]) + weights = tf.constant([[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]) + loss = classification_loss(predictions, targets, weights=weights) + self.assertEqual(loss.shape, [1, 2, 3]) + + def test_raise_error_on_empty_config(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + with self.assertRaises(ValueError): + losses_builder.build(losses_proto) + + +class HardExampleMinerBuilderTest(tf.test.TestCase): + + def test_do_not_build_hard_example_miner_by_default(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto) + self.assertEqual(hard_example_miner, None) + + def test_build_hard_example_miner_for_classification_loss(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + loss_type: CLASSIFICATION + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertEqual(hard_example_miner._loss_type, 'cls') + + def test_build_hard_example_miner_for_localization_loss(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + loss_type: LOCALIZATION + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertEqual(hard_example_miner._loss_type, 'loc') + + def test_build_hard_example_miner_with_non_default_values(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + num_hard_examples: 32 + iou_threshold: 0.5 + loss_type: LOCALIZATION + max_negatives_per_positive: 10 + min_negatives_per_image: 3 + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertEqual(hard_example_miner._num_hard_examples, 32) + self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5) + self.assertEqual(hard_example_miner._max_negatives_per_positive, 10) + self.assertEqual(hard_example_miner._min_negatives_per_image, 3) + + +class LossBuilderTest(tf.test.TestCase): + + def test_build_all_loss_parameters(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + } + classification_weight: 0.8 + localization_weight: 0.2 + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + (classification_loss, localization_loss, classification_weight, + localization_weight, hard_example_miner, _, + _) = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + self.assertTrue(isinstance(localization_loss, + losses.WeightedL2LocalizationLoss)) + self.assertAlmostEqual(classification_weight, 0.8) + self.assertAlmostEqual(localization_weight, 0.2) + + def test_build_expected_sampling(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + } + classification_weight: 0.8 + localization_weight: 0.2 + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + (classification_loss, localization_loss, classification_weight, + localization_weight, hard_example_miner, _, + _) = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertTrue( + isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + self.assertTrue( + isinstance(localization_loss, losses.WeightedL2LocalizationLoss)) + self.assertAlmostEqual(classification_weight, 0.8) + self.assertAlmostEqual(localization_weight, 0.2) + + + def test_build_reweighting_unmatched_anchors(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_softmax { + } + } + hard_example_miner { + } + classification_weight: 0.8 + localization_weight: 0.2 + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + (classification_loss, localization_loss, classification_weight, + localization_weight, hard_example_miner, _, + _) = losses_builder.build(losses_proto) + self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) + self.assertTrue( + isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + self.assertTrue( + isinstance(localization_loss, losses.WeightedL2LocalizationLoss)) + self.assertAlmostEqual(classification_weight, 0.8) + self.assertAlmostEqual(localization_weight, 0.2) + + def test_raise_error_when_both_focal_loss_and_hard_example_miner(self): + losses_text_proto = """ + localization_loss { + weighted_l2 { + } + } + classification_loss { + weighted_sigmoid_focal { + } + } + hard_example_miner { + } + classification_weight: 0.8 + localization_weight: 0.2 + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + with self.assertRaises(ValueError): + losses_builder.build(losses_proto) + + +class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase): + + def test_build_sigmoid_loss(self): + losses_text_proto = """ + weighted_sigmoid { + } + """ + losses_proto = losses_pb2.ClassificationLoss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss = losses_builder.build_faster_rcnn_classification_loss( + losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSigmoidClassificationLoss)) + + def test_build_softmax_loss(self): + losses_text_proto = """ + weighted_softmax { + } + """ + losses_proto = losses_pb2.ClassificationLoss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss = losses_builder.build_faster_rcnn_classification_loss( + losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + + def test_build_logits_softmax_loss(self): + losses_text_proto = """ + weighted_logits_softmax { + } + """ + losses_proto = losses_pb2.ClassificationLoss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss = losses_builder.build_faster_rcnn_classification_loss( + losses_proto) + self.assertTrue( + isinstance(classification_loss, + losses.WeightedSoftmaxClassificationAgainstLogitsLoss)) + + def test_build_sigmoid_focal_loss(self): + losses_text_proto = """ + weighted_sigmoid_focal { + } + """ + losses_proto = losses_pb2.ClassificationLoss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss = losses_builder.build_faster_rcnn_classification_loss( + losses_proto) + self.assertTrue( + isinstance(classification_loss, + losses.SigmoidFocalClassificationLoss)) + + def test_build_softmax_loss_by_default(self): + losses_text_proto = """ + """ + losses_proto = losses_pb2.ClassificationLoss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss = losses_builder.build_faster_rcnn_classification_loss( + losses_proto) + self.assertTrue(isinstance(classification_loss, + losses.WeightedSoftmaxClassificationLoss)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/matcher_builder.py b/builders/matcher_builder.py new file mode 100644 index 0000000..d334f43 --- /dev/null +++ b/builders/matcher_builder.py @@ -0,0 +1,53 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build an object detection matcher from configuration.""" + +from object_detection.matchers import argmax_matcher +from object_detection.matchers import bipartite_matcher +from object_detection.protos import matcher_pb2 + + +def build(matcher_config): + """Builds a matcher object based on the matcher config. + + Args: + matcher_config: A matcher.proto object containing the config for the desired + Matcher. + + Returns: + Matcher based on the config. + + Raises: + ValueError: On empty matcher proto. + """ + if not isinstance(matcher_config, matcher_pb2.Matcher): + raise ValueError('matcher_config not of type matcher_pb2.Matcher.') + if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher': + matcher = matcher_config.argmax_matcher + matched_threshold = unmatched_threshold = None + if not matcher.ignore_thresholds: + matched_threshold = matcher.matched_threshold + unmatched_threshold = matcher.unmatched_threshold + return argmax_matcher.ArgMaxMatcher( + matched_threshold=matched_threshold, + unmatched_threshold=unmatched_threshold, + negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched, + force_match_for_each_row=matcher.force_match_for_each_row, + use_matmul_gather=matcher.use_matmul_gather) + if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher': + matcher = matcher_config.bipartite_matcher + return bipartite_matcher.GreedyBipartiteMatcher(matcher.use_matmul_gather) + raise ValueError('Empty matcher.') diff --git a/builders/matcher_builder_test.py b/builders/matcher_builder_test.py new file mode 100644 index 0000000..6685449 --- /dev/null +++ b/builders/matcher_builder_test.py @@ -0,0 +1,99 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for matcher_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import matcher_builder +from object_detection.matchers import argmax_matcher +from object_detection.matchers import bipartite_matcher +from object_detection.protos import matcher_pb2 + + +class MatcherBuilderTest(tf.test.TestCase): + + def test_build_arg_max_matcher_with_defaults(self): + matcher_text_proto = """ + argmax_matcher { + } + """ + matcher_proto = matcher_pb2.Matcher() + text_format.Merge(matcher_text_proto, matcher_proto) + matcher_object = matcher_builder.build(matcher_proto) + self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) + self.assertAlmostEqual(matcher_object._matched_threshold, 0.5) + self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5) + self.assertTrue(matcher_object._negatives_lower_than_unmatched) + self.assertFalse(matcher_object._force_match_for_each_row) + + def test_build_arg_max_matcher_without_thresholds(self): + matcher_text_proto = """ + argmax_matcher { + ignore_thresholds: true + } + """ + matcher_proto = matcher_pb2.Matcher() + text_format.Merge(matcher_text_proto, matcher_proto) + matcher_object = matcher_builder.build(matcher_proto) + self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) + self.assertEqual(matcher_object._matched_threshold, None) + self.assertEqual(matcher_object._unmatched_threshold, None) + self.assertTrue(matcher_object._negatives_lower_than_unmatched) + self.assertFalse(matcher_object._force_match_for_each_row) + + def test_build_arg_max_matcher_with_non_default_parameters(self): + matcher_text_proto = """ + argmax_matcher { + matched_threshold: 0.7 + unmatched_threshold: 0.3 + negatives_lower_than_unmatched: false + force_match_for_each_row: true + use_matmul_gather: true + } + """ + matcher_proto = matcher_pb2.Matcher() + text_format.Merge(matcher_text_proto, matcher_proto) + matcher_object = matcher_builder.build(matcher_proto) + self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) + self.assertAlmostEqual(matcher_object._matched_threshold, 0.7) + self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3) + self.assertFalse(matcher_object._negatives_lower_than_unmatched) + self.assertTrue(matcher_object._force_match_for_each_row) + self.assertTrue(matcher_object._use_matmul_gather) + + def test_build_bipartite_matcher(self): + matcher_text_proto = """ + bipartite_matcher { + } + """ + matcher_proto = matcher_pb2.Matcher() + text_format.Merge(matcher_text_proto, matcher_proto) + matcher_object = matcher_builder.build(matcher_proto) + self.assertTrue( + isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher)) + + def test_raise_error_on_empty_matcher(self): + matcher_text_proto = """ + """ + matcher_proto = matcher_pb2.Matcher() + text_format.Merge(matcher_text_proto, matcher_proto) + with self.assertRaises(ValueError): + matcher_builder.build(matcher_proto) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/model_builder.py b/builders/model_builder.py new file mode 100644 index 0000000..4eb0368 --- /dev/null +++ b/builders/model_builder.py @@ -0,0 +1,636 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build a DetectionModel from configuration.""" + +import functools + +from object_detection.builders import anchor_generator_builder +from object_detection.builders import box_coder_builder +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.builders import image_resizer_builder +from object_detection.builders import losses_builder +from object_detection.builders import matcher_builder +from object_detection.builders import post_processing_builder +from object_detection.builders import region_similarity_calculator_builder as sim_calc +from object_detection.core import balanced_positive_negative_sampler as sampler +from object_detection.core import post_processing +from object_detection.core import target_assigner +from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.meta_architectures import rfcn_meta_arch +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res +from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras +from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2 +from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas +from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas +from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 +from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn +from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor as ssd_resnet_v1_fpn_keras +from object_detection.models import ssd_resnet_v1_ppn_feature_extractor as ssd_resnet_v1_ppn +from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor +from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor +from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor +from object_detection.models.ssd_mobilenet_edgetpu_feature_extractor import SSDMobileNetEdgeTPUFeatureExtractor +from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor +from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor +from object_detection.models.ssd_mobilenet_v1_fpn_keras_feature_extractor import SSDMobileNetV1FpnKerasFeatureExtractor +from object_detection.models.ssd_mobilenet_v1_keras_feature_extractor import SSDMobileNetV1KerasFeatureExtractor +from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor +from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor +from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor +from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor +from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor +from object_detection.models.ssd_mobilenet_v3_feature_extractor import SSDMobileNetV3LargeFeatureExtractor +from object_detection.models.ssd_mobilenet_v3_feature_extractor import SSDMobileNetV3SmallFeatureExtractor +from object_detection.models.ssd_pnasnet_feature_extractor import SSDPNASNetFeatureExtractor +from object_detection.predictors import rfcn_box_predictor +from object_detection.predictors import rfcn_keras_box_predictor +from object_detection.predictors.heads import mask_head +from object_detection.protos import model_pb2 +from object_detection.utils import ops + +# A map of names to SSD feature extractors. +SSD_FEATURE_EXTRACTOR_CLASS_MAP = { + 'ssd_inception_v2': SSDInceptionV2FeatureExtractor, + 'ssd_inception_v3': SSDInceptionV3FeatureExtractor, + 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor, + 'ssd_mobilenet_v1_fpn': SSDMobileNetV1FpnFeatureExtractor, + 'ssd_mobilenet_v1_ppn': SSDMobileNetV1PpnFeatureExtractor, + 'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor, + 'ssd_mobilenet_v2_fpn': SSDMobileNetV2FpnFeatureExtractor, + 'ssd_mobilenet_v3_large': SSDMobileNetV3LargeFeatureExtractor, + 'ssd_mobilenet_v3_small': SSDMobileNetV3SmallFeatureExtractor, + 'ssd_mobilenet_edgetpu': SSDMobileNetEdgeTPUFeatureExtractor, + 'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor, + 'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, + 'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor, + 'ssd_resnet50_v1_ppn': ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor, + 'ssd_resnet101_v1_ppn': + ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor, + 'ssd_resnet152_v1_ppn': + ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor, + 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor, + 'ssd_pnasnet': SSDPNASNetFeatureExtractor, +} + +SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = { + 'ssd_mobilenet_v1_keras': SSDMobileNetV1KerasFeatureExtractor, + 'ssd_mobilenet_v1_fpn_keras': SSDMobileNetV1FpnKerasFeatureExtractor, + 'ssd_mobilenet_v2_keras': SSDMobileNetV2KerasFeatureExtractor, + 'ssd_mobilenet_v2_fpn_keras': SSDMobileNetV2FpnKerasFeatureExtractor, + 'ssd_resnet50_v1_fpn_keras': + ssd_resnet_v1_fpn_keras.SSDResNet50V1FpnKerasFeatureExtractor, + 'ssd_resnet101_v1_fpn_keras': + ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor, + 'ssd_resnet152_v1_fpn_keras': + ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor, +} + +# A map of names to Faster R-CNN feature extractors. +FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = { + 'faster_rcnn_nas': + frcnn_nas.FasterRCNNNASFeatureExtractor, + 'faster_rcnn_pnas': + frcnn_pnas.FasterRCNNPNASFeatureExtractor, + 'faster_rcnn_inception_resnet_v2': + frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor, + 'faster_rcnn_inception_v2': + frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor, + 'faster_rcnn_resnet50': + frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, + 'faster_rcnn_resnet101': + frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, + 'faster_rcnn_resnet152': + frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, +} + +FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = { + 'faster_rcnn_inception_resnet_v2_keras': + frcnn_inc_res_keras.FasterRCNNInceptionResnetV2KerasFeatureExtractor, +} + + +def _build_ssd_feature_extractor(feature_extractor_config, + is_training, + freeze_batchnorm, + reuse_weights=None): + """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. + + Args: + feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. + is_training: True if this feature extractor is being built for training. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + reuse_weights: if the feature extractor should reuse weights. + + Returns: + ssd_meta_arch.SSDFeatureExtractor based on config. + + Raises: + ValueError: On invalid feature extractor type. + """ + feature_type = feature_extractor_config.type + is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP + depth_multiplier = feature_extractor_config.depth_multiplier + min_depth = feature_extractor_config.min_depth + pad_to_multiple = feature_extractor_config.pad_to_multiple + use_explicit_padding = feature_extractor_config.use_explicit_padding + use_depthwise = feature_extractor_config.use_depthwise + + if is_keras_extractor: + conv_hyperparams = hyperparams_builder.KerasLayerHyperparams( + feature_extractor_config.conv_hyperparams) + else: + conv_hyperparams = hyperparams_builder.build( + feature_extractor_config.conv_hyperparams, is_training) + override_base_feature_extractor_hyperparams = ( + feature_extractor_config.override_base_feature_extractor_hyperparams) + + if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and ( + not is_keras_extractor): + raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) + + if is_keras_extractor: + feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ + feature_type] + else: + feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] + kwargs = { + 'is_training': + is_training, + 'depth_multiplier': + depth_multiplier, + 'min_depth': + min_depth, + 'pad_to_multiple': + pad_to_multiple, + 'use_explicit_padding': + use_explicit_padding, + 'use_depthwise': + use_depthwise, + 'override_base_feature_extractor_hyperparams': + override_base_feature_extractor_hyperparams + } + + if feature_extractor_config.HasField('replace_preprocessor_with_placeholder'): + kwargs.update({ + 'replace_preprocessor_with_placeholder': + feature_extractor_config.replace_preprocessor_with_placeholder + }) + + if feature_extractor_config.HasField('num_layers'): + kwargs.update({'num_layers': feature_extractor_config.num_layers}) + + if is_keras_extractor: + kwargs.update({ + 'conv_hyperparams': conv_hyperparams, + 'inplace_batchnorm_update': False, + 'freeze_batchnorm': freeze_batchnorm + }) + else: + kwargs.update({ + 'conv_hyperparams_fn': conv_hyperparams, + 'reuse_weights': reuse_weights, + }) + + if feature_extractor_config.HasField('fpn'): + kwargs.update({ + 'fpn_min_level': + feature_extractor_config.fpn.min_level, + 'fpn_max_level': + feature_extractor_config.fpn.max_level, + 'additional_layer_depth': + feature_extractor_config.fpn.additional_layer_depth, + }) + + + return feature_extractor_class(**kwargs) + + +def _build_ssd_model(ssd_config, is_training, add_summaries): + """Builds an SSD detection model based on the model config. + + Args: + ssd_config: A ssd.proto object containing the config for the desired + SSDMetaArch. + is_training: True if this model is being built for training purposes. + add_summaries: Whether to add tf summaries in the model. + Returns: + SSDMetaArch based on the config. + + Raises: + ValueError: If ssd_config.type is not recognized (i.e. not registered in + model_class_map). + """ + num_classes = ssd_config.num_classes + + # Feature extractor + feature_extractor = _build_ssd_feature_extractor( + feature_extractor_config=ssd_config.feature_extractor, + freeze_batchnorm=ssd_config.freeze_batchnorm, + is_training=is_training) + + box_coder = box_coder_builder.build(ssd_config.box_coder) + matcher = matcher_builder.build(ssd_config.matcher) + region_similarity_calculator = sim_calc.build( + ssd_config.similarity_calculator) + encode_background_as_zeros = ssd_config.encode_background_as_zeros + negative_class_weight = ssd_config.negative_class_weight + anchor_generator = anchor_generator_builder.build( + ssd_config.anchor_generator) + if feature_extractor.is_keras_model: + ssd_box_predictor = box_predictor_builder.build_keras( + hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, + freeze_batchnorm=ssd_config.freeze_batchnorm, + inplace_batchnorm_update=False, + num_predictions_per_location_list=anchor_generator + .num_anchors_per_location(), + box_predictor_config=ssd_config.box_predictor, + is_training=is_training, + num_classes=num_classes, + add_background_class=ssd_config.add_background_class) + else: + ssd_box_predictor = box_predictor_builder.build( + hyperparams_builder.build, ssd_config.box_predictor, is_training, + num_classes, ssd_config.add_background_class) + image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) + non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( + ssd_config.post_processing) + (classification_loss, localization_loss, classification_weight, + localization_weight, hard_example_miner, random_example_sampler, + expected_loss_weights_fn) = losses_builder.build(ssd_config.loss) + normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches + normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize + + equalization_loss_config = ops.EqualizationLossConfig( + weight=ssd_config.loss.equalization_loss.weight, + exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes) + + target_assigner_instance = target_assigner.TargetAssigner( + region_similarity_calculator, + matcher, + box_coder, + negative_class_weight=negative_class_weight) + + ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch + kwargs = {} + + return ssd_meta_arch_fn( + is_training=is_training, + anchor_generator=anchor_generator, + box_predictor=ssd_box_predictor, + box_coder=box_coder, + feature_extractor=feature_extractor, + encode_background_as_zeros=encode_background_as_zeros, + image_resizer_fn=image_resizer_fn, + non_max_suppression_fn=non_max_suppression_fn, + score_conversion_fn=score_conversion_fn, + classification_loss=classification_loss, + localization_loss=localization_loss, + classification_loss_weight=classification_weight, + localization_loss_weight=localization_weight, + normalize_loss_by_num_matches=normalize_loss_by_num_matches, + hard_example_miner=hard_example_miner, + target_assigner_instance=target_assigner_instance, + add_summaries=add_summaries, + normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, + freeze_batchnorm=ssd_config.freeze_batchnorm, + inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, + add_background_class=ssd_config.add_background_class, + explicit_background_class=ssd_config.explicit_background_class, + random_example_sampler=random_example_sampler, + expected_loss_weights_fn=expected_loss_weights_fn, + use_confidences_as_targets=ssd_config.use_confidences_as_targets, + implicit_example_weight=ssd_config.implicit_example_weight, + equalization_loss_config=equalization_loss_config, + return_raw_detections_during_predict=( + ssd_config.return_raw_detections_during_predict), + **kwargs) + + +def _build_faster_rcnn_feature_extractor( + feature_extractor_config, is_training, reuse_weights=None, + inplace_batchnorm_update=False): + """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. + + Args: + feature_extractor_config: A FasterRcnnFeatureExtractor proto config from + faster_rcnn.proto. + is_training: True if this feature extractor is being built for training. + reuse_weights: if the feature extractor should reuse weights. + inplace_batchnorm_update: Whether to update batch_norm inplace during + training. This is required for batch norm to work correctly on TPUs. When + this is false, user must add a control dependency on + tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch + norm moving average parameters. + + Returns: + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. + + Raises: + ValueError: On invalid feature extractor type. + """ + if inplace_batchnorm_update: + raise ValueError('inplace batchnorm updates not supported.') + feature_type = feature_extractor_config.type + first_stage_features_stride = ( + feature_extractor_config.first_stage_features_stride) + batch_norm_trainable = feature_extractor_config.batch_norm_trainable + + if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP: + raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format( + feature_type)) + feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[ + feature_type] + return feature_extractor_class( + is_training, first_stage_features_stride, + batch_norm_trainable, reuse_weights=reuse_weights) + + +def _build_faster_rcnn_keras_feature_extractor( + feature_extractor_config, is_training, + inplace_batchnorm_update=False): + """Builds a faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor from config. + + Args: + feature_extractor_config: A FasterRcnnFeatureExtractor proto config from + faster_rcnn.proto. + is_training: True if this feature extractor is being built for training. + inplace_batchnorm_update: Whether to update batch_norm inplace during + training. This is required for batch norm to work correctly on TPUs. When + this is false, user must add a control dependency on + tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch + norm moving average parameters. + + Returns: + faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor based on config. + + Raises: + ValueError: On invalid feature extractor type. + """ + if inplace_batchnorm_update: + raise ValueError('inplace batchnorm updates not supported.') + feature_type = feature_extractor_config.type + first_stage_features_stride = ( + feature_extractor_config.first_stage_features_stride) + batch_norm_trainable = feature_extractor_config.batch_norm_trainable + + if feature_type not in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP: + raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format( + feature_type)) + feature_extractor_class = FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ + feature_type] + return feature_extractor_class( + is_training, first_stage_features_stride, + batch_norm_trainable) + + +def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): + """Builds a Faster R-CNN or R-FCN detection model based on the model config. + + Builds R-FCN model if the second_stage_box_predictor in the config is of type + `rfcn_box_predictor` else builds a Faster R-CNN model. + + Args: + frcnn_config: A faster_rcnn.proto object containing the config for the + desired FasterRCNNMetaArch or RFCNMetaArch. + is_training: True if this model is being built for training purposes. + add_summaries: Whether to add tf summaries in the model. + + Returns: + FasterRCNNMetaArch based on the config. + + Raises: + ValueError: If frcnn_config.type is not recognized (i.e. not registered in + model_class_map). + """ + num_classes = frcnn_config.num_classes + image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) + + is_keras = (frcnn_config.feature_extractor.type in + FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP) + + if is_keras: + feature_extractor = _build_faster_rcnn_keras_feature_extractor( + frcnn_config.feature_extractor, is_training, + inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) + else: + feature_extractor = _build_faster_rcnn_feature_extractor( + frcnn_config.feature_extractor, is_training, + inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) + + number_of_stages = frcnn_config.number_of_stages + first_stage_anchor_generator = anchor_generator_builder.build( + frcnn_config.first_stage_anchor_generator) + + first_stage_target_assigner = target_assigner.create_target_assigner( + 'FasterRCNN', + 'proposal', + use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) + first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate + if is_keras: + first_stage_box_predictor_arg_scope_fn = ( + hyperparams_builder.KerasLayerHyperparams( + frcnn_config.first_stage_box_predictor_conv_hyperparams)) + else: + first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( + frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) + first_stage_box_predictor_kernel_size = ( + frcnn_config.first_stage_box_predictor_kernel_size) + first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth + first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size + use_static_shapes = frcnn_config.use_static_shapes and ( + frcnn_config.use_static_shapes_for_eval or is_training) + first_stage_sampler = sampler.BalancedPositiveNegativeSampler( + positive_fraction=frcnn_config.first_stage_positive_balance_fraction, + is_static=(frcnn_config.use_static_balanced_label_sampler and + use_static_shapes)) + first_stage_max_proposals = frcnn_config.first_stage_max_proposals + if (frcnn_config.first_stage_nms_iou_threshold < 0 or + frcnn_config.first_stage_nms_iou_threshold > 1.0): + raise ValueError('iou_threshold not in [0, 1.0].') + if (is_training and frcnn_config.second_stage_batch_size > + first_stage_max_proposals): + raise ValueError('second_stage_batch_size should be no greater than ' + 'first_stage_max_proposals.') + first_stage_non_max_suppression_fn = functools.partial( + post_processing.batch_multiclass_non_max_suppression, + score_thresh=frcnn_config.first_stage_nms_score_threshold, + iou_thresh=frcnn_config.first_stage_nms_iou_threshold, + max_size_per_class=frcnn_config.first_stage_max_proposals, + max_total_size=frcnn_config.first_stage_max_proposals, + use_static_shapes=use_static_shapes, + use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage, + use_combined_nms=frcnn_config.use_combined_nms_in_first_stage) + first_stage_loc_loss_weight = ( + frcnn_config.first_stage_localization_loss_weight) + first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight + + initial_crop_size = frcnn_config.initial_crop_size + maxpool_kernel_size = frcnn_config.maxpool_kernel_size + maxpool_stride = frcnn_config.maxpool_stride + + second_stage_target_assigner = target_assigner.create_target_assigner( + 'FasterRCNN', + 'detection', + use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) + if is_keras: + second_stage_box_predictor = box_predictor_builder.build_keras( + hyperparams_builder.KerasLayerHyperparams, + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[1], + box_predictor_config=frcnn_config.second_stage_box_predictor, + is_training=is_training, + num_classes=num_classes) + else: + second_stage_box_predictor = box_predictor_builder.build( + hyperparams_builder.build, + frcnn_config.second_stage_box_predictor, + is_training=is_training, + num_classes=num_classes) + second_stage_batch_size = frcnn_config.second_stage_batch_size + second_stage_sampler = sampler.BalancedPositiveNegativeSampler( + positive_fraction=frcnn_config.second_stage_balance_fraction, + is_static=(frcnn_config.use_static_balanced_label_sampler and + use_static_shapes)) + (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn + ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) + second_stage_localization_loss_weight = ( + frcnn_config.second_stage_localization_loss_weight) + second_stage_classification_loss = ( + losses_builder.build_faster_rcnn_classification_loss( + frcnn_config.second_stage_classification_loss)) + second_stage_classification_loss_weight = ( + frcnn_config.second_stage_classification_loss_weight) + second_stage_mask_prediction_loss_weight = ( + frcnn_config.second_stage_mask_prediction_loss_weight) + + hard_example_miner = None + if frcnn_config.HasField('hard_example_miner'): + hard_example_miner = losses_builder.build_hard_example_miner( + frcnn_config.hard_example_miner, + second_stage_classification_loss_weight, + second_stage_localization_loss_weight) + + crop_and_resize_fn = ( + ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize + else ops.native_crop_and_resize) + clip_anchors_to_image = ( + frcnn_config.clip_anchors_to_image) + + common_kwargs = { + 'is_training': is_training, + 'num_classes': num_classes, + 'image_resizer_fn': image_resizer_fn, + 'feature_extractor': feature_extractor, + 'number_of_stages': number_of_stages, + 'first_stage_anchor_generator': first_stage_anchor_generator, + 'first_stage_target_assigner': first_stage_target_assigner, + 'first_stage_atrous_rate': first_stage_atrous_rate, + 'first_stage_box_predictor_arg_scope_fn': + first_stage_box_predictor_arg_scope_fn, + 'first_stage_box_predictor_kernel_size': + first_stage_box_predictor_kernel_size, + 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, + 'first_stage_minibatch_size': first_stage_minibatch_size, + 'first_stage_sampler': first_stage_sampler, + 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, + 'first_stage_max_proposals': first_stage_max_proposals, + 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, + 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, + 'second_stage_target_assigner': second_stage_target_assigner, + 'second_stage_batch_size': second_stage_batch_size, + 'second_stage_sampler': second_stage_sampler, + 'second_stage_non_max_suppression_fn': + second_stage_non_max_suppression_fn, + 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, + 'second_stage_localization_loss_weight': + second_stage_localization_loss_weight, + 'second_stage_classification_loss': + second_stage_classification_loss, + 'second_stage_classification_loss_weight': + second_stage_classification_loss_weight, + 'hard_example_miner': hard_example_miner, + 'add_summaries': add_summaries, + 'crop_and_resize_fn': crop_and_resize_fn, + 'clip_anchors_to_image': clip_anchors_to_image, + 'use_static_shapes': use_static_shapes, + 'resize_masks': frcnn_config.resize_masks, + 'return_raw_detections_during_predict': ( + frcnn_config.return_raw_detections_during_predict) + } + + if (isinstance(second_stage_box_predictor, + rfcn_box_predictor.RfcnBoxPredictor) or + isinstance(second_stage_box_predictor, + rfcn_keras_box_predictor.RfcnKerasBoxPredictor)): + return rfcn_meta_arch.RFCNMetaArch( + second_stage_rfcn_box_predictor=second_stage_box_predictor, + **common_kwargs) + else: + return faster_rcnn_meta_arch.FasterRCNNMetaArch( + initial_crop_size=initial_crop_size, + maxpool_kernel_size=maxpool_kernel_size, + maxpool_stride=maxpool_stride, + second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, + second_stage_mask_prediction_loss_weight=( + second_stage_mask_prediction_loss_weight), + **common_kwargs) + +EXPERIMENTAL_META_ARCH_BUILDER_MAP = { +} + + +def _build_experimental_model(config, is_training, add_summaries=True): + return EXPERIMENTAL_META_ARCH_BUILDER_MAP[config.name]( + is_training, add_summaries) + +META_ARCHITECURE_BUILDER_MAP = { + 'ssd': _build_ssd_model, + 'faster_rcnn': _build_faster_rcnn_model, + 'experimental_model': _build_experimental_model +} + + +def build(model_config, is_training, add_summaries=True): + """Builds a DetectionModel based on the model config. + + Args: + model_config: A model.proto object containing the config for the desired + DetectionModel. + is_training: True if this model is being built for training purposes. + add_summaries: Whether to add tensorflow summaries in the model graph. + Returns: + DetectionModel based on the config. + + Raises: + ValueError: On invalid meta architecture or model. + """ + if not isinstance(model_config, model_pb2.DetectionModel): + raise ValueError('model_config not of type model_pb2.DetectionModel.') + + meta_architecture = model_config.WhichOneof('model') + + if meta_architecture not in META_ARCHITECURE_BUILDER_MAP: + raise ValueError('Unknown meta architecture: {}'.format(meta_architecture)) + else: + build_func = META_ARCHITECURE_BUILDER_MAP[meta_architecture] + return build_func(getattr(model_config, meta_architecture), is_training, + add_summaries) diff --git a/builders/model_builder_test.py b/builders/model_builder_test.py new file mode 100644 index 0000000..ed3bf50 --- /dev/null +++ b/builders/model_builder_test.py @@ -0,0 +1,346 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.models.model_builder.""" + +from absl.testing import parameterized + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import model_builder +from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.meta_architectures import rfcn_meta_arch +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn +from object_detection.protos import hyperparams_pb2 +from object_detection.protos import losses_pb2 +from object_detection.protos import model_pb2 + + +class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase): + + def create_model(self, model_config, is_training=True): + """Builds a DetectionModel based on the model config. + + Args: + model_config: A model.proto object containing the config for the desired + DetectionModel. + is_training: True if this model is being built for training purposes. + + Returns: + DetectionModel based on the config. + """ + return model_builder.build(model_config, is_training=is_training) + + def create_default_ssd_model_proto(self): + """Creates a DetectionModel proto with ssd model fields populated.""" + model_text_proto = """ + ssd { + feature_extractor { + type: 'ssd_inception_v2' + conv_hyperparams { + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + override_base_feature_extractor_hyperparams: true + } + box_coder { + faster_rcnn_box_coder { + } + } + matcher { + argmax_matcher { + } + } + similarity_calculator { + iou_similarity { + } + } + anchor_generator { + ssd_anchor_generator { + aspect_ratios: 1.0 + } + } + image_resizer { + fixed_shape_resizer { + height: 320 + width: 320 + } + } + box_predictor { + convolutional_box_predictor { + conv_hyperparams { + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + } + } + loss { + classification_loss { + weighted_softmax { + } + } + localization_loss { + weighted_smooth_l1 { + } + } + } + }""" + model_proto = model_pb2.DetectionModel() + text_format.Merge(model_text_proto, model_proto) + return model_proto + + def create_default_faster_rcnn_model_proto(self): + """Creates a DetectionModel proto with FasterRCNN model fields populated.""" + model_text_proto = """ + faster_rcnn { + inplace_batchnorm_update: false + num_classes: 3 + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 600 + max_dimension: 1024 + } + } + feature_extractor { + type: 'faster_rcnn_resnet101' + } + first_stage_anchor_generator { + grid_anchor_generator { + scales: [0.25, 0.5, 1.0, 2.0] + aspect_ratios: [0.5, 1.0, 2.0] + height_stride: 16 + width_stride: 16 + } + } + first_stage_box_predictor_conv_hyperparams { + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + initial_crop_size: 14 + maxpool_kernel_size: 2 + maxpool_stride: 2 + second_stage_box_predictor { + mask_rcnn_box_predictor { + conv_hyperparams { + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + fc_hyperparams { + op: FC + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + } + } + } + second_stage_post_processing { + batch_non_max_suppression { + score_threshold: 0.01 + iou_threshold: 0.6 + max_detections_per_class: 100 + max_total_detections: 300 + } + score_converter: SOFTMAX + } + }""" + model_proto = model_pb2.DetectionModel() + text_format.Merge(model_text_proto, model_proto) + return model_proto + + def test_create_ssd_models_from_config(self): + model_proto = self.create_default_ssd_model_proto() + ssd_feature_extractor_map = {} + ssd_feature_extractor_map.update( + model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP) + ssd_feature_extractor_map.update( + model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP) + + for extractor_type, extractor_class in ssd_feature_extractor_map.items(): + model_proto.ssd.feature_extractor.type = extractor_type + model = model_builder.build(model_proto, is_training=True) + self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) + self.assertIsInstance(model._feature_extractor, extractor_class) + + def test_create_ssd_fpn_model_from_config(self): + model_proto = self.create_default_ssd_model_proto() + model_proto.ssd.feature_extractor.type = 'ssd_resnet101_v1_fpn' + model_proto.ssd.feature_extractor.fpn.min_level = 3 + model_proto.ssd.feature_extractor.fpn.max_level = 7 + model = model_builder.build(model_proto, is_training=True) + self.assertIsInstance(model._feature_extractor, + ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor) + self.assertEqual(model._feature_extractor._fpn_min_level, 3) + self.assertEqual(model._feature_extractor._fpn_max_level, 7) + + + @parameterized.named_parameters( + { + 'testcase_name': 'mask_rcnn_with_matmul', + 'use_matmul_crop_and_resize': False, + 'enable_mask_prediction': True + }, + { + 'testcase_name': 'mask_rcnn_without_matmul', + 'use_matmul_crop_and_resize': True, + 'enable_mask_prediction': True + }, + { + 'testcase_name': 'faster_rcnn_with_matmul', + 'use_matmul_crop_and_resize': False, + 'enable_mask_prediction': False + }, + { + 'testcase_name': 'faster_rcnn_without_matmul', + 'use_matmul_crop_and_resize': True, + 'enable_mask_prediction': False + }, + ) + def test_create_faster_rcnn_models_from_config( + self, use_matmul_crop_and_resize, enable_mask_prediction): + model_proto = self.create_default_faster_rcnn_model_proto() + faster_rcnn_config = model_proto.faster_rcnn + faster_rcnn_config.use_matmul_crop_and_resize = use_matmul_crop_and_resize + if enable_mask_prediction: + faster_rcnn_config.second_stage_mask_prediction_loss_weight = 3.0 + mask_predictor_config = ( + faster_rcnn_config.second_stage_box_predictor.mask_rcnn_box_predictor) + mask_predictor_config.predict_instance_masks = True + + for extractor_type, extractor_class in ( + model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()): + faster_rcnn_config.feature_extractor.type = extractor_type + model = model_builder.build(model_proto, is_training=True) + self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) + self.assertIsInstance(model._feature_extractor, extractor_class) + if enable_mask_prediction: + self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0) + + def test_create_faster_rcnn_model_from_config_with_example_miner(self): + model_proto = self.create_default_faster_rcnn_model_proto() + model_proto.faster_rcnn.hard_example_miner.num_hard_examples = 64 + model = model_builder.build(model_proto, is_training=True) + self.assertIsNotNone(model._hard_example_miner) + + def test_create_rfcn_model_from_config(self): + model_proto = self.create_default_faster_rcnn_model_proto() + rfcn_predictor_config = ( + model_proto.faster_rcnn.second_stage_box_predictor.rfcn_box_predictor) + rfcn_predictor_config.conv_hyperparams.op = hyperparams_pb2.Hyperparams.CONV + for extractor_type, extractor_class in ( + model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()): + model_proto.faster_rcnn.feature_extractor.type = extractor_type + model = model_builder.build(model_proto, is_training=True) + self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch) + self.assertIsInstance(model._feature_extractor, extractor_class) + + def test_invalid_model_config_proto(self): + model_proto = '' + with self.assertRaisesRegexp( + ValueError, 'model_config not of type model_pb2.DetectionModel.'): + model_builder.build(model_proto, is_training=True) + + def test_unknown_meta_architecture(self): + model_proto = model_pb2.DetectionModel() + with self.assertRaisesRegexp(ValueError, 'Unknown meta architecture'): + model_builder.build(model_proto, is_training=True) + + def test_unknown_ssd_feature_extractor(self): + model_proto = self.create_default_ssd_model_proto() + model_proto.ssd.feature_extractor.type = 'unknown_feature_extractor' + with self.assertRaisesRegexp(ValueError, 'Unknown ssd feature_extractor'): + model_builder.build(model_proto, is_training=True) + + def test_unknown_faster_rcnn_feature_extractor(self): + model_proto = self.create_default_faster_rcnn_model_proto() + model_proto.faster_rcnn.feature_extractor.type = 'unknown_feature_extractor' + with self.assertRaisesRegexp(ValueError, + 'Unknown Faster R-CNN feature_extractor'): + model_builder.build(model_proto, is_training=True) + + def test_invalid_first_stage_nms_iou_threshold(self): + model_proto = self.create_default_faster_rcnn_model_proto() + model_proto.faster_rcnn.first_stage_nms_iou_threshold = 1.1 + with self.assertRaisesRegexp(ValueError, + r'iou_threshold not in \[0, 1\.0\]'): + model_builder.build(model_proto, is_training=True) + model_proto.faster_rcnn.first_stage_nms_iou_threshold = -0.1 + with self.assertRaisesRegexp(ValueError, + r'iou_threshold not in \[0, 1\.0\]'): + model_builder.build(model_proto, is_training=True) + + def test_invalid_second_stage_batch_size(self): + model_proto = self.create_default_faster_rcnn_model_proto() + model_proto.faster_rcnn.first_stage_max_proposals = 1 + model_proto.faster_rcnn.second_stage_batch_size = 2 + with self.assertRaisesRegexp( + ValueError, 'second_stage_batch_size should be no greater ' + 'than first_stage_max_proposals.'): + model_builder.build(model_proto, is_training=True) + + def test_invalid_faster_rcnn_batchnorm_update(self): + model_proto = self.create_default_faster_rcnn_model_proto() + model_proto.faster_rcnn.inplace_batchnorm_update = True + with self.assertRaisesRegexp(ValueError, + 'inplace batchnorm updates not supported'): + model_builder.build(model_proto, is_training=True) + + def test_create_experimental_model(self): + + model_text_proto = """ + experimental_model { + name: 'model42' + }""" + + build_func = lambda *args: 42 + model_builder.EXPERIMENTAL_META_ARCH_BUILDER_MAP['model42'] = build_func + model_proto = model_pb2.DetectionModel() + text_format.Merge(model_text_proto, model_proto) + + self.assertEqual(model_builder.build(model_proto, is_training=True), 42) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/optimizer_builder.py b/builders/optimizer_builder.py new file mode 100644 index 0000000..1cd2a61 --- /dev/null +++ b/builders/optimizer_builder.py @@ -0,0 +1,201 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions to build DetectionModel training optimizers.""" + +import tensorflow as tf + + +from object_detection.utils import learning_schedules + + +def build_optimizers_tf_v1(optimizer_config, global_step=None): + """Create a TF v1 compatible optimizer based on config. + + Args: + optimizer_config: A Optimizer proto message. + global_step: A variable representing the current step. + If None, defaults to tf.train.get_or_create_global_step() + + Returns: + An optimizer and a list of variables for summary. + + Raises: + ValueError: when using an unsupported input data type. + """ + optimizer_type = optimizer_config.WhichOneof('optimizer') + optimizer = None + + summary_vars = [] + if optimizer_type == 'rms_prop_optimizer': + config = optimizer_config.rms_prop_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.train.RMSPropOptimizer( + learning_rate, + decay=config.decay, + momentum=config.momentum_optimizer_value, + epsilon=config.epsilon) + + if optimizer_type == 'momentum_optimizer': + config = optimizer_config.momentum_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.train.MomentumOptimizer( + learning_rate, + momentum=config.momentum_optimizer_value) + + if optimizer_type == 'adam_optimizer': + config = optimizer_config.adam_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.train.AdamOptimizer(learning_rate) + + + if optimizer is None: + raise ValueError('Optimizer %s not supported.' % optimizer_type) + + if optimizer_config.use_moving_average: + optimizer = tf.contrib.opt.MovingAverageOptimizer( + optimizer, average_decay=optimizer_config.moving_average_decay) + + return optimizer, summary_vars + + +def build_optimizers_tf_v2(optimizer_config, global_step=None): + """Create a TF v2 compatible optimizer based on config. + + Args: + optimizer_config: A Optimizer proto message. + global_step: A variable representing the current step. + If None, defaults to tf.train.get_or_create_global_step() + + Returns: + An optimizer and a list of variables for summary. + + Raises: + ValueError: when using an unsupported input data type. + """ + optimizer_type = optimizer_config.WhichOneof('optimizer') + optimizer = None + + summary_vars = [] + if optimizer_type == 'rms_prop_optimizer': + config = optimizer_config.rms_prop_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.keras.optimizers.RMSprop( + learning_rate, + decay=config.decay, + momentum=config.momentum_optimizer_value, + epsilon=config.epsilon) + + if optimizer_type == 'momentum_optimizer': + config = optimizer_config.momentum_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.keras.optimizers.SGD( + learning_rate, + momentum=config.momentum_optimizer_value) + + if optimizer_type == 'adam_optimizer': + config = optimizer_config.adam_optimizer + learning_rate = _create_learning_rate(config.learning_rate, + global_step=global_step) + summary_vars.append(learning_rate) + optimizer = tf.keras.optimizers.Adam(learning_rate) + + if optimizer is None: + raise ValueError('Optimizer %s not supported.' % optimizer_type) + + if optimizer_config.use_moving_average: + raise ValueError('Moving average not supported in eager mode.') + + return optimizer, summary_vars + + +def build(config, global_step=None): + + if tf.executing_eagerly(): + return build_optimizers_tf_v2(config, global_step) + else: + return build_optimizers_tf_v1(config, global_step) + + +def _create_learning_rate(learning_rate_config, global_step=None): + """Create optimizer learning rate based on config. + + Args: + learning_rate_config: A LearningRate proto message. + global_step: A variable representing the current step. + If None, defaults to tf.train.get_or_create_global_step() + + Returns: + A learning rate. + + Raises: + ValueError: when using an unsupported input data type. + """ + if global_step is None: + global_step = tf.train.get_or_create_global_step() + learning_rate = None + learning_rate_type = learning_rate_config.WhichOneof('learning_rate') + if learning_rate_type == 'constant_learning_rate': + config = learning_rate_config.constant_learning_rate + learning_rate = tf.constant(config.learning_rate, dtype=tf.float32, + name='learning_rate') + + if learning_rate_type == 'exponential_decay_learning_rate': + config = learning_rate_config.exponential_decay_learning_rate + learning_rate = learning_schedules.exponential_decay_with_burnin( + global_step, + config.initial_learning_rate, + config.decay_steps, + config.decay_factor, + burnin_learning_rate=config.burnin_learning_rate, + burnin_steps=config.burnin_steps, + min_learning_rate=config.min_learning_rate, + staircase=config.staircase) + + if learning_rate_type == 'manual_step_learning_rate': + config = learning_rate_config.manual_step_learning_rate + if not config.schedule: + raise ValueError('Empty learning rate schedule.') + learning_rate_step_boundaries = [x.step for x in config.schedule] + learning_rate_sequence = [config.initial_learning_rate] + learning_rate_sequence += [x.learning_rate for x in config.schedule] + learning_rate = learning_schedules.manual_stepping( + global_step, learning_rate_step_boundaries, + learning_rate_sequence, config.warmup) + + if learning_rate_type == 'cosine_decay_learning_rate': + config = learning_rate_config.cosine_decay_learning_rate + learning_rate = learning_schedules.cosine_decay_with_warmup( + global_step, + config.learning_rate_base, + config.total_steps, + config.warmup_learning_rate, + config.warmup_steps, + config.hold_base_rate_steps) + + if learning_rate is None: + raise ValueError('Learning_rate %s not supported.' % learning_rate_type) + + return learning_rate diff --git a/builders/optimizer_builder_test.py b/builders/optimizer_builder_test.py new file mode 100644 index 0000000..343a858 --- /dev/null +++ b/builders/optimizer_builder_test.py @@ -0,0 +1,208 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for optimizer_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import optimizer_builder +from object_detection.protos import optimizer_pb2 + + +class LearningRateBuilderTest(tf.test.TestCase): + + def testBuildConstantLearningRate(self): + learning_rate_text_proto = """ + constant_learning_rate { + learning_rate: 0.004 + } + """ + learning_rate_proto = optimizer_pb2.LearningRate() + text_format.Merge(learning_rate_text_proto, learning_rate_proto) + learning_rate = optimizer_builder._create_learning_rate( + learning_rate_proto) + self.assertTrue(learning_rate.op.name.endswith('learning_rate')) + with self.test_session(): + learning_rate_out = learning_rate.eval() + self.assertAlmostEqual(learning_rate_out, 0.004) + + def testBuildExponentialDecayLearningRate(self): + learning_rate_text_proto = """ + exponential_decay_learning_rate { + initial_learning_rate: 0.004 + decay_steps: 99999 + decay_factor: 0.85 + staircase: false + } + """ + learning_rate_proto = optimizer_pb2.LearningRate() + text_format.Merge(learning_rate_text_proto, learning_rate_proto) + learning_rate = optimizer_builder._create_learning_rate( + learning_rate_proto) + self.assertTrue(learning_rate.op.name.endswith('learning_rate')) + self.assertTrue(isinstance(learning_rate, tf.Tensor)) + + def testBuildManualStepLearningRate(self): + learning_rate_text_proto = """ + manual_step_learning_rate { + initial_learning_rate: 0.002 + schedule { + step: 100 + learning_rate: 0.006 + } + schedule { + step: 90000 + learning_rate: 0.00006 + } + warmup: true + } + """ + learning_rate_proto = optimizer_pb2.LearningRate() + text_format.Merge(learning_rate_text_proto, learning_rate_proto) + learning_rate = optimizer_builder._create_learning_rate( + learning_rate_proto) + self.assertTrue(isinstance(learning_rate, tf.Tensor)) + + def testBuildCosineDecayLearningRate(self): + learning_rate_text_proto = """ + cosine_decay_learning_rate { + learning_rate_base: 0.002 + total_steps: 20000 + warmup_learning_rate: 0.0001 + warmup_steps: 1000 + hold_base_rate_steps: 20000 + } + """ + learning_rate_proto = optimizer_pb2.LearningRate() + text_format.Merge(learning_rate_text_proto, learning_rate_proto) + learning_rate = optimizer_builder._create_learning_rate( + learning_rate_proto) + self.assertTrue(isinstance(learning_rate, tf.Tensor)) + + def testRaiseErrorOnEmptyLearningRate(self): + learning_rate_text_proto = """ + """ + learning_rate_proto = optimizer_pb2.LearningRate() + text_format.Merge(learning_rate_text_proto, learning_rate_proto) + with self.assertRaises(ValueError): + optimizer_builder._create_learning_rate(learning_rate_proto) + + +class OptimizerBuilderTest(tf.test.TestCase): + + def testBuildRMSPropOptimizer(self): + optimizer_text_proto = """ + rms_prop_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.004 + decay_steps: 800720 + decay_factor: 0.95 + } + } + momentum_optimizer_value: 0.9 + decay: 0.9 + epsilon: 1.0 + } + use_moving_average: false + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer)) + + def testBuildMomentumOptimizer(self): + optimizer_text_proto = """ + momentum_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.001 + } + } + momentum_optimizer_value: 0.99 + } + use_moving_average: false + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer)) + + def testBuildAdamOptimizer(self): + optimizer_text_proto = """ + adam_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.002 + } + } + } + use_moving_average: false + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer)) + + def testBuildMovingAverageOptimizer(self): + optimizer_text_proto = """ + adam_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.002 + } + } + } + use_moving_average: True + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertTrue( + isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) + + def testBuildMovingAverageOptimizerWithNonDefaultDecay(self): + optimizer_text_proto = """ + adam_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.002 + } + } + } + use_moving_average: True + moving_average_decay: 0.2 + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertTrue( + isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) + # TODO(rathodv): Find a way to not depend on the private members. + self.assertAlmostEqual(optimizer._ema._decay, 0.2) + + def testBuildEmptyOptimizer(self): + optimizer_text_proto = """ + """ + optimizer_proto = optimizer_pb2.Optimizer() + text_format.Merge(optimizer_text_proto, optimizer_proto) + with self.assertRaises(ValueError): + optimizer_builder.build(optimizer_proto) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/post_processing_builder.py b/builders/post_processing_builder.py new file mode 100644 index 0000000..35126ab --- /dev/null +++ b/builders/post_processing_builder.py @@ -0,0 +1,181 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Builder function for post processing operations.""" +import functools + +import tensorflow as tf +from object_detection.builders import calibration_builder +from object_detection.core import post_processing +from object_detection.protos import post_processing_pb2 + + +def build(post_processing_config): + """Builds callables for post-processing operations. + + Builds callables for non-max suppression, score conversion, and (optionally) + calibration based on the configuration. + + Non-max suppression callable takes `boxes`, `scores`, and optionally + `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns + `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See + post_processing.batch_multiclass_non_max_suppression for the type and shape + of these tensors. + + Score converter callable should be called with `input` tensor. The callable + returns the output from one of 3 tf operations based on the configuration - + tf.identity, tf.sigmoid or tf.nn.softmax. If a calibration config is provided, + score_converter also applies calibration transformations, as defined in + calibration_builder.py. See tensorflow documentation for argument and return + value descriptions. + + Args: + post_processing_config: post_processing.proto object containing the + parameters for the post-processing operations. + + Returns: + non_max_suppressor_fn: Callable for non-max suppression. + score_converter_fn: Callable for score conversion. + + Raises: + ValueError: if the post_processing_config is of incorrect type. + """ + if not isinstance(post_processing_config, post_processing_pb2.PostProcessing): + raise ValueError('post_processing_config not of type ' + 'post_processing_pb2.Postprocessing.') + non_max_suppressor_fn = _build_non_max_suppressor( + post_processing_config.batch_non_max_suppression) + score_converter_fn = _build_score_converter( + post_processing_config.score_converter, + post_processing_config.logit_scale) + if post_processing_config.HasField('calibration_config'): + score_converter_fn = _build_calibrated_score_converter( + score_converter_fn, + post_processing_config.calibration_config) + return non_max_suppressor_fn, score_converter_fn + + +def _build_non_max_suppressor(nms_config): + """Builds non-max suppresson based on the nms config. + + Args: + nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto. + + Returns: + non_max_suppressor_fn: Callable non-max suppressor. + + Raises: + ValueError: On incorrect iou_threshold or on incompatible values of + max_total_detections and max_detections_per_class or on negative + soft_nms_sigma. + """ + if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0: + raise ValueError('iou_threshold not in [0, 1.0].') + if nms_config.max_detections_per_class > nms_config.max_total_detections: + raise ValueError('max_detections_per_class should be no greater than ' + 'max_total_detections.') + if nms_config.soft_nms_sigma < 0.0: + raise ValueError('soft_nms_sigma should be non-negative.') + if nms_config.use_combined_nms and nms_config.use_class_agnostic_nms: + raise ValueError('combined_nms does not support class_agnostic_nms.') + non_max_suppressor_fn = functools.partial( + post_processing.batch_multiclass_non_max_suppression, + score_thresh=nms_config.score_threshold, + iou_thresh=nms_config.iou_threshold, + max_size_per_class=nms_config.max_detections_per_class, + max_total_size=nms_config.max_total_detections, + use_static_shapes=nms_config.use_static_shapes, + use_class_agnostic_nms=nms_config.use_class_agnostic_nms, + max_classes_per_detection=nms_config.max_classes_per_detection, + soft_nms_sigma=nms_config.soft_nms_sigma, + use_partitioned_nms=nms_config.use_partitioned_nms, + use_combined_nms=nms_config.use_combined_nms, + change_coordinate_frame=True) + + return non_max_suppressor_fn + + +def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale): + """Create a function to scale logits then apply a Tensorflow function.""" + def score_converter_fn(logits): + scaled_logits = tf.divide(logits, logit_scale, name='scale_logits') + return tf_score_converter_fn(scaled_logits, name='convert_scores') + score_converter_fn.__name__ = '%s_with_logit_scale' % ( + tf_score_converter_fn.__name__) + return score_converter_fn + + +def _build_score_converter(score_converter_config, logit_scale): + """Builds score converter based on the config. + + Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on + the config. + + Args: + score_converter_config: post_processing_pb2.PostProcessing.score_converter. + logit_scale: temperature to use for SOFTMAX score_converter. + + Returns: + Callable score converter op. + + Raises: + ValueError: On unknown score converter. + """ + if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY: + return _score_converter_fn_with_logit_scale(tf.identity, logit_scale) + if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID: + return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale) + if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX: + return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale) + raise ValueError('Unknown score converter.') + + +def _build_calibrated_score_converter(score_converter_fn, calibration_config): + """Wraps a score_converter_fn, adding a calibration step. + + Builds a score converter function with a calibration transformation according + to calibration_builder.py. The score conversion function may be applied before + or after the calibration transformation, depending on the calibration method. + If the method is temperature scaling, the score conversion is + after the calibration transformation. Otherwise, the score conversion is + before the calibration transformation. Calibration applies positive monotonic + transformations to inputs (i.e. score ordering is strictly preserved or + adjacent scores are mapped to the same score). When calibration is + class-agnostic, the highest-scoring class remains unchanged, unless two + adjacent scores are mapped to the same value and one class arbitrarily + selected to break the tie. In per-class calibration, it's possible (though + rare in practice) that the highest-scoring class will change, since positive + monotonicity is only required to hold within each class. + + Args: + score_converter_fn: callable that takes logit scores as input. + calibration_config: post_processing_pb2.PostProcessing.calibration_config. + + Returns: + Callable calibrated score coverter op. + """ + calibration_fn = calibration_builder.build(calibration_config) + def calibrated_score_converter_fn(logits): + if (calibration_config.WhichOneof('calibrator') == + 'temperature_scaling_calibration'): + calibrated_logits = calibration_fn(logits) + return score_converter_fn(calibrated_logits) + else: + converted_logits = score_converter_fn(logits) + return calibration_fn(converted_logits) + + calibrated_score_converter_fn.__name__ = ( + 'calibrate_with_%s' % calibration_config.WhichOneof('calibrator')) + return calibrated_score_converter_fn diff --git a/builders/post_processing_builder_test.py b/builders/post_processing_builder_test.py new file mode 100644 index 0000000..5514a51 --- /dev/null +++ b/builders/post_processing_builder_test.py @@ -0,0 +1,185 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for post_processing_builder.""" + +import tensorflow as tf +from google.protobuf import text_format +from object_detection.builders import post_processing_builder +from object_detection.protos import post_processing_pb2 + + +class PostProcessingBuilderTest(tf.test.TestCase): + + def test_build_non_max_suppressor_with_correct_parameters(self): + post_processing_text_proto = """ + batch_non_max_suppression { + score_threshold: 0.7 + iou_threshold: 0.6 + max_detections_per_class: 100 + max_total_detections: 300 + soft_nms_sigma: 0.4 + } + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + non_max_suppressor, _ = post_processing_builder.build( + post_processing_config) + self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) + self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) + self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) + self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) + self.assertAlmostEqual(non_max_suppressor.keywords['soft_nms_sigma'], 0.4) + + def test_build_non_max_suppressor_with_correct_parameters_classagnostic_nms( + self): + post_processing_text_proto = """ + batch_non_max_suppression { + score_threshold: 0.7 + iou_threshold: 0.6 + max_detections_per_class: 10 + max_total_detections: 300 + use_class_agnostic_nms: True + max_classes_per_detection: 1 + } + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + non_max_suppressor, _ = post_processing_builder.build( + post_processing_config) + self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 10) + self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) + self.assertEqual(non_max_suppressor.keywords['max_classes_per_detection'], + 1) + self.assertEqual(non_max_suppressor.keywords['use_class_agnostic_nms'], + True) + self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) + self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) + + def test_build_identity_score_converter(self): + post_processing_text_proto = """ + score_converter: IDENTITY + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, score_converter = post_processing_builder.build( + post_processing_config) + self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') + + inputs = tf.constant([1, 1], tf.float32) + outputs = score_converter(inputs) + with self.test_session() as sess: + converted_scores = sess.run(outputs) + expected_converted_scores = sess.run(inputs) + self.assertAllClose(converted_scores, expected_converted_scores) + + def test_build_identity_score_converter_with_logit_scale(self): + post_processing_text_proto = """ + score_converter: IDENTITY + logit_scale: 2.0 + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, score_converter = post_processing_builder.build(post_processing_config) + self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') + + inputs = tf.constant([1, 1], tf.float32) + outputs = score_converter(inputs) + with self.test_session() as sess: + converted_scores = sess.run(outputs) + expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32)) + self.assertAllClose(converted_scores, expected_converted_scores) + + def test_build_sigmoid_score_converter(self): + post_processing_text_proto = """ + score_converter: SIGMOID + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, score_converter = post_processing_builder.build(post_processing_config) + self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale') + + def test_build_softmax_score_converter(self): + post_processing_text_proto = """ + score_converter: SOFTMAX + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, score_converter = post_processing_builder.build(post_processing_config) + self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') + + def test_build_softmax_score_converter_with_temperature(self): + post_processing_text_proto = """ + score_converter: SOFTMAX + logit_scale: 2.0 + """ + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, score_converter = post_processing_builder.build(post_processing_config) + self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') + + def test_build_calibrator_with_nonempty_config(self): + """Test that identity function used when no calibration_config specified.""" + # Calibration config maps all scores to 0.5. + post_processing_text_proto = """ + score_converter: SOFTMAX + calibration_config { + function_approximation { + x_y_pairs { + x_y_pair { + x: 0.0 + y: 0.5 + } + x_y_pair { + x: 1.0 + y: 0.5 + }}}}""" + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, calibrated_score_conversion_fn = post_processing_builder.build( + post_processing_config) + self.assertEqual(calibrated_score_conversion_fn.__name__, + 'calibrate_with_function_approximation') + + input_scores = tf.constant([1, 1], tf.float32) + outputs = calibrated_score_conversion_fn(input_scores) + with self.test_session() as sess: + calibrated_scores = sess.run(outputs) + expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32)) + self.assertAllClose(calibrated_scores, expected_calibrated_scores) + + def test_build_temperature_scaling_calibrator(self): + post_processing_text_proto = """ + score_converter: SOFTMAX + calibration_config { + temperature_scaling_calibration { + scaler: 2.0 + }}""" + post_processing_config = post_processing_pb2.PostProcessing() + text_format.Merge(post_processing_text_proto, post_processing_config) + _, calibrated_score_conversion_fn = post_processing_builder.build( + post_processing_config) + self.assertEqual(calibrated_score_conversion_fn.__name__, + 'calibrate_with_temperature_scaling_calibration') + + input_scores = tf.constant([1, 1], tf.float32) + outputs = calibrated_score_conversion_fn(input_scores) + with self.test_session() as sess: + calibrated_scores = sess.run(outputs) + expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32)) + self.assertAllClose(calibrated_scores, expected_calibrated_scores) + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/preprocessor_builder.py b/builders/preprocessor_builder.py new file mode 100644 index 0000000..de71905 --- /dev/null +++ b/builders/preprocessor_builder.py @@ -0,0 +1,403 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Builder for preprocessing steps.""" + +import tensorflow as tf + +from object_detection.core import preprocessor +from object_detection.protos import preprocessor_pb2 + + +def _get_step_config_from_proto(preprocessor_step_config, step_name): + """Returns the value of a field named step_name from proto. + + Args: + preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object. + step_name: Name of the field to get value from. + + Returns: + result_dict: a sub proto message from preprocessor_step_config which will be + later converted to a dictionary. + + Raises: + ValueError: If field does not exist in proto. + """ + for field, value in preprocessor_step_config.ListFields(): + if field.name == step_name: + return value + + raise ValueError('Could not get field %s from proto!' % step_name) + + +def _get_dict_from_proto(config): + """Helper function to put all proto fields into a dictionary. + + For many preprocessing steps, there's an trivial 1-1 mapping from proto fields + to function arguments. This function automatically populates a dictionary with + the arguments from the proto. + + Protos that CANNOT be trivially populated include: + * nested messages. + * steps that check if an optional field is set (ie. where None != 0). + * protos that don't map 1-1 to arguments (ie. list should be reshaped). + * fields requiring additional validation (ie. repeated field has n elements). + + Args: + config: A protobuf object that does not violate the conditions above. + + Returns: + result_dict: |config| converted into a python dictionary. + """ + result_dict = {} + for field, value in config.ListFields(): + result_dict[field.name] = value + return result_dict + + +# A map from a PreprocessingStep proto config field name to the preprocessing +# function that should be used. The PreprocessingStep proto should be parsable +# with _get_dict_from_proto. +PREPROCESSING_FUNCTION_MAP = { + 'normalize_image': + preprocessor.normalize_image, + 'random_pixel_value_scale': + preprocessor.random_pixel_value_scale, + 'random_image_scale': + preprocessor.random_image_scale, + 'random_rgb_to_gray': + preprocessor.random_rgb_to_gray, + 'random_adjust_brightness': + preprocessor.random_adjust_brightness, + 'random_adjust_contrast': + preprocessor.random_adjust_contrast, + 'random_adjust_hue': + preprocessor.random_adjust_hue, + 'random_adjust_saturation': + preprocessor.random_adjust_saturation, + 'random_distort_color': + preprocessor.random_distort_color, + 'random_jitter_boxes': + preprocessor.random_jitter_boxes, + 'random_crop_to_aspect_ratio': + preprocessor.random_crop_to_aspect_ratio, + 'random_black_patches': + preprocessor.random_black_patches, + 'random_jpeg_quality': + preprocessor.random_jpeg_quality, + 'random_downscale_to_target_pixels': + preprocessor.random_downscale_to_target_pixels, + 'random_patch_gaussian': + preprocessor.random_patch_gaussian, + 'rgb_to_gray': + preprocessor.rgb_to_gray, + 'scale_boxes_to_pixel_coordinates': ( + preprocessor.scale_boxes_to_pixel_coordinates), + 'subtract_channel_mean': + preprocessor.subtract_channel_mean, + 'convert_class_logits_to_softmax': + preprocessor.convert_class_logits_to_softmax, +} + + +# A map to convert from preprocessor_pb2.ResizeImage.Method enum to +# tf.image.ResizeMethod. +RESIZE_METHOD_MAP = { + preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA, + preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC, + preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR, + preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: ( + tf.image.ResizeMethod.NEAREST_NEIGHBOR), +} + + +def build(preprocessor_step_config): + """Builds preprocessing step based on the configuration. + + Args: + preprocessor_step_config: PreprocessingStep configuration proto. + + Returns: + function, argmap: A callable function and an argument map to call function + with. + + Raises: + ValueError: On invalid configuration. + """ + step_type = preprocessor_step_config.WhichOneof('preprocessing_step') + + if step_type in PREPROCESSING_FUNCTION_MAP: + preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type] + step_config = _get_step_config_from_proto(preprocessor_step_config, + step_type) + function_args = _get_dict_from_proto(step_config) + return (preprocessing_function, function_args) + + if step_type == 'random_horizontal_flip': + config = preprocessor_step_config.random_horizontal_flip + return (preprocessor.random_horizontal_flip, + { + 'keypoint_flip_permutation': tuple( + config.keypoint_flip_permutation), + }) + + if step_type == 'random_vertical_flip': + config = preprocessor_step_config.random_vertical_flip + return (preprocessor.random_vertical_flip, + { + 'keypoint_flip_permutation': tuple( + config.keypoint_flip_permutation), + }) + + if step_type == 'random_rotation90': + return (preprocessor.random_rotation90, {}) + + if step_type == 'random_crop_image': + config = preprocessor_step_config.random_crop_image + return (preprocessor.random_crop_image, + { + 'min_object_covered': config.min_object_covered, + 'aspect_ratio_range': (config.min_aspect_ratio, + config.max_aspect_ratio), + 'area_range': (config.min_area, config.max_area), + 'overlap_thresh': config.overlap_thresh, + 'clip_boxes': config.clip_boxes, + 'random_coef': config.random_coef, + }) + + if step_type == 'random_pad_image': + config = preprocessor_step_config.random_pad_image + min_image_size = None + if (config.HasField('min_image_height') != + config.HasField('min_image_width')): + raise ValueError('min_image_height and min_image_width should be either ' + 'both set or both unset.') + if config.HasField('min_image_height'): + min_image_size = (config.min_image_height, config.min_image_width) + + max_image_size = None + if (config.HasField('max_image_height') != + config.HasField('max_image_width')): + raise ValueError('max_image_height and max_image_width should be either ' + 'both set or both unset.') + if config.HasField('max_image_height'): + max_image_size = (config.max_image_height, config.max_image_width) + + pad_color = config.pad_color or None + if pad_color: + if len(pad_color) != 3: + tf.logging.warn('pad_color should have 3 elements (RGB) if set!') + + pad_color = tf.cast([x for x in config.pad_color], dtype=tf.float32) + return (preprocessor.random_pad_image, + { + 'min_image_size': min_image_size, + 'max_image_size': max_image_size, + 'pad_color': pad_color, + }) + + if step_type == 'random_absolute_pad_image': + config = preprocessor_step_config.random_absolute_pad_image + + max_height_padding = config.max_height_padding or 1 + max_width_padding = config.max_width_padding or 1 + + pad_color = config.pad_color or None + if pad_color: + if len(pad_color) != 3: + tf.logging.warn('pad_color should have 3 elements (RGB) if set!') + + pad_color = tf.cast([x for x in config.pad_color], dtype=tf.float32) + + return (preprocessor.random_absolute_pad_image, + { + 'max_height_padding': max_height_padding, + 'max_width_padding': max_width_padding, + 'pad_color': pad_color, + }) + if step_type == 'random_crop_pad_image': + config = preprocessor_step_config.random_crop_pad_image + min_padded_size_ratio = config.min_padded_size_ratio + if min_padded_size_ratio and len(min_padded_size_ratio) != 2: + raise ValueError('min_padded_size_ratio should have 2 elements if set!') + max_padded_size_ratio = config.max_padded_size_ratio + if max_padded_size_ratio and len(max_padded_size_ratio) != 2: + raise ValueError('max_padded_size_ratio should have 2 elements if set!') + pad_color = config.pad_color or None + if pad_color: + if len(pad_color) != 3: + tf.logging.warn('pad_color should have 3 elements (RGB) if set!') + + pad_color = tf.cast([x for x in config.pad_color], dtype=tf.float32) + + kwargs = { + 'min_object_covered': config.min_object_covered, + 'aspect_ratio_range': (config.min_aspect_ratio, + config.max_aspect_ratio), + 'area_range': (config.min_area, config.max_area), + 'overlap_thresh': config.overlap_thresh, + 'clip_boxes': config.clip_boxes, + 'random_coef': config.random_coef, + 'pad_color': pad_color, + } + if min_padded_size_ratio: + kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio) + if max_padded_size_ratio: + kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio) + return (preprocessor.random_crop_pad_image, kwargs) + + if step_type == 'random_resize_method': + config = preprocessor_step_config.random_resize_method + return (preprocessor.random_resize_method, + { + 'target_size': [config.target_height, config.target_width], + }) + + if step_type == 'resize_image': + config = preprocessor_step_config.resize_image + method = RESIZE_METHOD_MAP[config.method] + return (preprocessor.resize_image, + { + 'new_height': config.new_height, + 'new_width': config.new_width, + 'method': method + }) + + if step_type == 'random_self_concat_image': + config = preprocessor_step_config.random_self_concat_image + return (preprocessor.random_self_concat_image, { + 'concat_vertical_probability': config.concat_vertical_probability, + 'concat_horizontal_probability': config.concat_horizontal_probability + }) + + if step_type == 'ssd_random_crop': + config = preprocessor_step_config.ssd_random_crop + if config.operations: + min_object_covered = [op.min_object_covered for op in config.operations] + aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) + for op in config.operations] + area_range = [(op.min_area, op.max_area) for op in config.operations] + overlap_thresh = [op.overlap_thresh for op in config.operations] + clip_boxes = [op.clip_boxes for op in config.operations] + random_coef = [op.random_coef for op in config.operations] + return (preprocessor.ssd_random_crop, + { + 'min_object_covered': min_object_covered, + 'aspect_ratio_range': aspect_ratio_range, + 'area_range': area_range, + 'overlap_thresh': overlap_thresh, + 'clip_boxes': clip_boxes, + 'random_coef': random_coef, + }) + return (preprocessor.ssd_random_crop, {}) + + if step_type == 'autoaugment_image': + config = preprocessor_step_config.autoaugment_image + return (preprocessor.autoaugment_image, { + 'policy_name': config.policy_name, + }) + + if step_type == 'drop_label_probabilistically': + config = preprocessor_step_config.drop_label_probabilistically + return (preprocessor.drop_label_probabilistically, { + 'dropped_label': config.label, + 'drop_probability': config.drop_probability, + }) + + if step_type == 'remap_labels': + config = preprocessor_step_config.remap_labels + return (preprocessor.remap_labels, { + 'original_labels': config.original_labels, + 'new_label': config.new_label + }) + + if step_type == 'ssd_random_crop_pad': + config = preprocessor_step_config.ssd_random_crop_pad + if config.operations: + min_object_covered = [op.min_object_covered for op in config.operations] + aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) + for op in config.operations] + area_range = [(op.min_area, op.max_area) for op in config.operations] + overlap_thresh = [op.overlap_thresh for op in config.operations] + clip_boxes = [op.clip_boxes for op in config.operations] + random_coef = [op.random_coef for op in config.operations] + min_padded_size_ratio = [tuple(op.min_padded_size_ratio) + for op in config.operations] + max_padded_size_ratio = [tuple(op.max_padded_size_ratio) + for op in config.operations] + pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b) + for op in config.operations] + return (preprocessor.ssd_random_crop_pad, + { + 'min_object_covered': min_object_covered, + 'aspect_ratio_range': aspect_ratio_range, + 'area_range': area_range, + 'overlap_thresh': overlap_thresh, + 'clip_boxes': clip_boxes, + 'random_coef': random_coef, + 'min_padded_size_ratio': min_padded_size_ratio, + 'max_padded_size_ratio': max_padded_size_ratio, + 'pad_color': pad_color, + }) + return (preprocessor.ssd_random_crop_pad, {}) + + if step_type == 'ssd_random_crop_fixed_aspect_ratio': + config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio + if config.operations: + min_object_covered = [op.min_object_covered for op in config.operations] + area_range = [(op.min_area, op.max_area) for op in config.operations] + overlap_thresh = [op.overlap_thresh for op in config.operations] + clip_boxes = [op.clip_boxes for op in config.operations] + random_coef = [op.random_coef for op in config.operations] + return (preprocessor.ssd_random_crop_fixed_aspect_ratio, + { + 'min_object_covered': min_object_covered, + 'aspect_ratio': config.aspect_ratio, + 'area_range': area_range, + 'overlap_thresh': overlap_thresh, + 'clip_boxes': clip_boxes, + 'random_coef': random_coef, + }) + return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {}) + + if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio': + config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio + kwargs = {} + aspect_ratio = config.aspect_ratio + if aspect_ratio: + kwargs['aspect_ratio'] = aspect_ratio + min_padded_size_ratio = config.min_padded_size_ratio + if min_padded_size_ratio: + if len(min_padded_size_ratio) != 2: + raise ValueError('min_padded_size_ratio should have 2 elements if set!') + kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio) + max_padded_size_ratio = config.max_padded_size_ratio + if max_padded_size_ratio: + if len(max_padded_size_ratio) != 2: + raise ValueError('max_padded_size_ratio should have 2 elements if set!') + kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio) + if config.operations: + kwargs['min_object_covered'] = [op.min_object_covered + for op in config.operations] + kwargs['aspect_ratio_range'] = [(op.min_aspect_ratio, op.max_aspect_ratio) + for op in config.operations] + kwargs['area_range'] = [(op.min_area, op.max_area) + for op in config.operations] + kwargs['overlap_thresh'] = [op.overlap_thresh for op in config.operations] + kwargs['clip_boxes'] = [op.clip_boxes for op in config.operations] + kwargs['random_coef'] = [op.random_coef for op in config.operations] + return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, kwargs) + + raise ValueError('Unknown preprocessing step.') diff --git a/builders/preprocessor_builder_test.py b/builders/preprocessor_builder_test.py new file mode 100644 index 0000000..8e3343a --- /dev/null +++ b/builders/preprocessor_builder_test.py @@ -0,0 +1,728 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for preprocessor_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import preprocessor_builder +from object_detection.core import preprocessor +from object_detection.protos import preprocessor_pb2 + + +class PreprocessorBuilderTest(tf.test.TestCase): + + def assert_dictionary_close(self, dict1, dict2): + """Helper to check if two dicts with floatst or integers are close.""" + self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys())) + for key in dict1: + value = dict1[key] + if isinstance(value, float): + self.assertAlmostEqual(value, dict2[key]) + else: + self.assertEqual(value, dict2[key]) + + def test_build_normalize_image(self): + preprocessor_text_proto = """ + normalize_image { + original_minval: 0.0 + original_maxval: 255.0 + target_minval: -1.0 + target_maxval: 1.0 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.normalize_image) + self.assertEqual(args, { + 'original_minval': 0.0, + 'original_maxval': 255.0, + 'target_minval': -1.0, + 'target_maxval': 1.0, + }) + + def test_build_random_horizontal_flip(self): + preprocessor_text_proto = """ + random_horizontal_flip { + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 4 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_horizontal_flip) + self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) + + def test_build_random_vertical_flip(self): + preprocessor_text_proto = """ + random_vertical_flip { + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 4 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_vertical_flip) + self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) + + def test_build_random_rotation90(self): + preprocessor_text_proto = """ + random_rotation90 {} + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_rotation90) + self.assertEqual(args, {}) + + def test_build_random_pixel_value_scale(self): + preprocessor_text_proto = """ + random_pixel_value_scale { + minval: 0.8 + maxval: 1.2 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_pixel_value_scale) + self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2}) + + def test_build_random_image_scale(self): + preprocessor_text_proto = """ + random_image_scale { + min_scale_ratio: 0.8 + max_scale_ratio: 2.2 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_image_scale) + self.assert_dictionary_close(args, {'min_scale_ratio': 0.8, + 'max_scale_ratio': 2.2}) + + def test_build_random_rgb_to_gray(self): + preprocessor_text_proto = """ + random_rgb_to_gray { + probability: 0.8 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_rgb_to_gray) + self.assert_dictionary_close(args, {'probability': 0.8}) + + def test_build_random_adjust_brightness(self): + preprocessor_text_proto = """ + random_adjust_brightness { + max_delta: 0.2 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_adjust_brightness) + self.assert_dictionary_close(args, {'max_delta': 0.2}) + + def test_build_random_adjust_contrast(self): + preprocessor_text_proto = """ + random_adjust_contrast { + min_delta: 0.7 + max_delta: 1.1 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_adjust_contrast) + self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1}) + + def test_build_random_adjust_hue(self): + preprocessor_text_proto = """ + random_adjust_hue { + max_delta: 0.01 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_adjust_hue) + self.assert_dictionary_close(args, {'max_delta': 0.01}) + + def test_build_random_adjust_saturation(self): + preprocessor_text_proto = """ + random_adjust_saturation { + min_delta: 0.75 + max_delta: 1.15 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_adjust_saturation) + self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15}) + + def test_build_random_distort_color(self): + preprocessor_text_proto = """ + random_distort_color { + color_ordering: 1 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_distort_color) + self.assertEqual(args, {'color_ordering': 1}) + + def test_build_random_jitter_boxes(self): + preprocessor_text_proto = """ + random_jitter_boxes { + ratio: 0.1 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_jitter_boxes) + self.assert_dictionary_close(args, {'ratio': 0.1}) + + def test_build_random_crop_image(self): + preprocessor_text_proto = """ + random_crop_image { + min_object_covered: 0.75 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.25 + max_area: 0.875 + overlap_thresh: 0.5 + clip_boxes: False + random_coef: 0.125 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_crop_image) + self.assertEqual(args, { + 'min_object_covered': 0.75, + 'aspect_ratio_range': (0.75, 1.5), + 'area_range': (0.25, 0.875), + 'overlap_thresh': 0.5, + 'clip_boxes': False, + 'random_coef': 0.125, + }) + + def test_build_random_pad_image(self): + preprocessor_text_proto = """ + random_pad_image { + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_pad_image) + self.assertEqual(args, { + 'min_image_size': None, + 'max_image_size': None, + 'pad_color': None, + }) + + def test_build_random_absolute_pad_image(self): + preprocessor_text_proto = """ + random_absolute_pad_image { + max_height_padding: 50 + max_width_padding: 100 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_absolute_pad_image) + self.assertEqual(args, { + 'max_height_padding': 50, + 'max_width_padding': 100, + 'pad_color': None, + }) + + def test_build_random_crop_pad_image(self): + preprocessor_text_proto = """ + random_crop_pad_image { + min_object_covered: 0.75 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.25 + max_area: 0.875 + overlap_thresh: 0.5 + clip_boxes: False + random_coef: 0.125 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_crop_pad_image) + self.assertEqual(args, { + 'min_object_covered': 0.75, + 'aspect_ratio_range': (0.75, 1.5), + 'area_range': (0.25, 0.875), + 'overlap_thresh': 0.5, + 'clip_boxes': False, + 'random_coef': 0.125, + 'pad_color': None, + }) + + def test_build_random_crop_pad_image_with_optional_parameters(self): + preprocessor_text_proto = """ + random_crop_pad_image { + min_object_covered: 0.75 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.25 + max_area: 0.875 + overlap_thresh: 0.5 + clip_boxes: False + random_coef: 0.125 + min_padded_size_ratio: 0.5 + min_padded_size_ratio: 0.75 + max_padded_size_ratio: 0.5 + max_padded_size_ratio: 0.75 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_crop_pad_image) + self.assertEqual(args, { + 'min_object_covered': 0.75, + 'aspect_ratio_range': (0.75, 1.5), + 'area_range': (0.25, 0.875), + 'overlap_thresh': 0.5, + 'clip_boxes': False, + 'random_coef': 0.125, + 'min_padded_size_ratio': (0.5, 0.75), + 'max_padded_size_ratio': (0.5, 0.75), + 'pad_color': None, + }) + + def test_build_random_crop_to_aspect_ratio(self): + preprocessor_text_proto = """ + random_crop_to_aspect_ratio { + aspect_ratio: 0.85 + overlap_thresh: 0.35 + clip_boxes: False + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio) + self.assert_dictionary_close(args, {'aspect_ratio': 0.85, + 'overlap_thresh': 0.35, + 'clip_boxes': False}) + + def test_build_random_black_patches(self): + preprocessor_text_proto = """ + random_black_patches { + max_black_patches: 20 + probability: 0.95 + size_to_image_ratio: 0.12 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_black_patches) + self.assert_dictionary_close(args, {'max_black_patches': 20, + 'probability': 0.95, + 'size_to_image_ratio': 0.12}) + + def test_build_random_jpeg_quality(self): + preprocessor_text_proto = """ + random_jpeg_quality { + random_coef: 0.5 + min_jpeg_quality: 40 + max_jpeg_quality: 90 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Parse(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_jpeg_quality) + self.assert_dictionary_close(args, {'random_coef': 0.5, + 'min_jpeg_quality': 40, + 'max_jpeg_quality': 90}) + + def test_build_random_downscale_to_target_pixels(self): + preprocessor_text_proto = """ + random_downscale_to_target_pixels { + random_coef: 0.5 + min_target_pixels: 200 + max_target_pixels: 900 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Parse(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_downscale_to_target_pixels) + self.assert_dictionary_close(args, { + 'random_coef': 0.5, + 'min_target_pixels': 200, + 'max_target_pixels': 900 + }) + + def test_build_random_patch_gaussian(self): + preprocessor_text_proto = """ + random_patch_gaussian { + random_coef: 0.5 + min_patch_size: 10 + max_patch_size: 300 + min_gaussian_stddev: 0.2 + max_gaussian_stddev: 1.5 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Parse(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_patch_gaussian) + self.assert_dictionary_close(args, { + 'random_coef': 0.5, + 'min_patch_size': 10, + 'max_patch_size': 300, + 'min_gaussian_stddev': 0.2, + 'max_gaussian_stddev': 1.5 + }) + + def test_auto_augment_image(self): + preprocessor_text_proto = """ + autoaugment_image { + policy_name: 'v0' + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.autoaugment_image) + self.assert_dictionary_close(args, {'policy_name': 'v0'}) + + def test_drop_label_probabilistically(self): + preprocessor_text_proto = """ + drop_label_probabilistically{ + label: 2 + drop_probability: 0.5 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.drop_label_probabilistically) + self.assert_dictionary_close(args, { + 'dropped_label': 2, + 'drop_probability': 0.5 + }) + + def test_remap_labels(self): + preprocessor_text_proto = """ + remap_labels{ + original_labels: 1 + original_labels: 2 + new_label: 3 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.remap_labels) + self.assert_dictionary_close(args, { + 'original_labels': [1, 2], + 'new_label': 3 + }) + + def test_build_random_resize_method(self): + preprocessor_text_proto = """ + random_resize_method { + target_height: 75 + target_width: 100 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_resize_method) + self.assert_dictionary_close(args, {'target_size': [75, 100]}) + + def test_build_scale_boxes_to_pixel_coordinates(self): + preprocessor_text_proto = """ + scale_boxes_to_pixel_coordinates {} + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates) + self.assertEqual(args, {}) + + def test_build_resize_image(self): + preprocessor_text_proto = """ + resize_image { + new_height: 75 + new_width: 100 + method: BICUBIC + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.resize_image) + self.assertEqual(args, {'new_height': 75, + 'new_width': 100, + 'method': tf.image.ResizeMethod.BICUBIC}) + + def test_build_rgb_to_gray(self): + preprocessor_text_proto = """ + rgb_to_gray {} + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.rgb_to_gray) + self.assertEqual(args, {}) + + def test_build_subtract_channel_mean(self): + preprocessor_text_proto = """ + subtract_channel_mean { + means: [1.0, 2.0, 3.0] + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.subtract_channel_mean) + self.assertEqual(args, {'means': [1.0, 2.0, 3.0]}) + + def test_random_self_concat_image(self): + preprocessor_text_proto = """ + random_self_concat_image { + concat_vertical_probability: 0.5 + concat_horizontal_probability: 0.25 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.random_self_concat_image) + self.assertEqual(args, {'concat_vertical_probability': 0.5, + 'concat_horizontal_probability': 0.25}) + + def test_build_ssd_random_crop(self): + preprocessor_text_proto = """ + ssd_random_crop { + operations { + min_object_covered: 0.0 + min_aspect_ratio: 0.875 + max_aspect_ratio: 1.125 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.0 + clip_boxes: False + random_coef: 0.375 + } + operations { + min_object_covered: 0.25 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.25 + clip_boxes: True + random_coef: 0.375 + } + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.ssd_random_crop) + self.assertEqual(args, {'min_object_covered': [0.0, 0.25], + 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], + 'area_range': [(0.5, 1.0), (0.5, 1.0)], + 'overlap_thresh': [0.0, 0.25], + 'clip_boxes': [False, True], + 'random_coef': [0.375, 0.375]}) + + def test_build_ssd_random_crop_empty_operations(self): + preprocessor_text_proto = """ + ssd_random_crop { + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.ssd_random_crop) + self.assertEqual(args, {}) + + def test_build_ssd_random_crop_pad(self): + preprocessor_text_proto = """ + ssd_random_crop_pad { + operations { + min_object_covered: 0.0 + min_aspect_ratio: 0.875 + max_aspect_ratio: 1.125 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.0 + clip_boxes: False + random_coef: 0.375 + min_padded_size_ratio: [1.0, 1.0] + max_padded_size_ratio: [2.0, 2.0] + pad_color_r: 0.5 + pad_color_g: 0.5 + pad_color_b: 0.5 + } + operations { + min_object_covered: 0.25 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.25 + clip_boxes: True + random_coef: 0.375 + min_padded_size_ratio: [1.0, 1.0] + max_padded_size_ratio: [2.0, 2.0] + pad_color_r: 0.5 + pad_color_g: 0.5 + pad_color_b: 0.5 + } + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.ssd_random_crop_pad) + self.assertEqual(args, {'min_object_covered': [0.0, 0.25], + 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], + 'area_range': [(0.5, 1.0), (0.5, 1.0)], + 'overlap_thresh': [0.0, 0.25], + 'clip_boxes': [False, True], + 'random_coef': [0.375, 0.375], + 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)], + 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)], + 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]}) + + def test_build_ssd_random_crop_fixed_aspect_ratio(self): + preprocessor_text_proto = """ + ssd_random_crop_fixed_aspect_ratio { + operations { + min_object_covered: 0.0 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.0 + clip_boxes: False + random_coef: 0.375 + } + operations { + min_object_covered: 0.25 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.25 + clip_boxes: True + random_coef: 0.375 + } + aspect_ratio: 0.875 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio) + self.assertEqual(args, {'min_object_covered': [0.0, 0.25], + 'aspect_ratio': 0.875, + 'area_range': [(0.5, 1.0), (0.5, 1.0)], + 'overlap_thresh': [0.0, 0.25], + 'clip_boxes': [False, True], + 'random_coef': [0.375, 0.375]}) + + def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self): + preprocessor_text_proto = """ + ssd_random_crop_pad_fixed_aspect_ratio { + operations { + min_object_covered: 0.0 + min_aspect_ratio: 0.875 + max_aspect_ratio: 1.125 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.0 + clip_boxes: False + random_coef: 0.375 + } + operations { + min_object_covered: 0.25 + min_aspect_ratio: 0.75 + max_aspect_ratio: 1.5 + min_area: 0.5 + max_area: 1.0 + overlap_thresh: 0.25 + clip_boxes: True + random_coef: 0.375 + } + aspect_ratio: 0.875 + min_padded_size_ratio: [1.0, 1.0] + max_padded_size_ratio: [2.0, 2.0] + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, + preprocessor.ssd_random_crop_pad_fixed_aspect_ratio) + self.assertEqual(args, {'min_object_covered': [0.0, 0.25], + 'aspect_ratio': 0.875, + 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], + 'area_range': [(0.5, 1.0), (0.5, 1.0)], + 'overlap_thresh': [0.0, 0.25], + 'clip_boxes': [False, True], + 'random_coef': [0.375, 0.375], + 'min_padded_size_ratio': (1.0, 1.0), + 'max_padded_size_ratio': (2.0, 2.0)}) + + def test_build_normalize_image_convert_class_logits_to_softmax(self): + preprocessor_text_proto = """ + convert_class_logits_to_softmax { + temperature: 2 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Merge(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.convert_class_logits_to_softmax) + self.assertEqual(args, {'temperature': 2}) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/region_similarity_calculator_builder.py b/builders/region_similarity_calculator_builder.py new file mode 100644 index 0000000..8f35087 --- /dev/null +++ b/builders/region_similarity_calculator_builder.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Builder for region similarity calculators.""" + +from object_detection.core import region_similarity_calculator +from object_detection.protos import region_similarity_calculator_pb2 + + +def build(region_similarity_calculator_config): + """Builds region similarity calculator based on the configuration. + + Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See + core/region_similarity_calculator.proto for details. + + Args: + region_similarity_calculator_config: RegionSimilarityCalculator + configuration proto. + + Returns: + region_similarity_calculator: RegionSimilarityCalculator object. + + Raises: + ValueError: On unknown region similarity calculator. + """ + + if not isinstance( + region_similarity_calculator_config, + region_similarity_calculator_pb2.RegionSimilarityCalculator): + raise ValueError( + 'region_similarity_calculator_config not of type ' + 'region_similarity_calculator_pb2.RegionsSimilarityCalculator') + + similarity_calculator = region_similarity_calculator_config.WhichOneof( + 'region_similarity') + if similarity_calculator == 'iou_similarity': + return region_similarity_calculator.IouSimilarity() + if similarity_calculator == 'ioa_similarity': + return region_similarity_calculator.IoaSimilarity() + if similarity_calculator == 'neg_sq_dist_similarity': + return region_similarity_calculator.NegSqDistSimilarity() + if similarity_calculator == 'thresholded_iou_similarity': + return region_similarity_calculator.ThresholdedIouSimilarity( + region_similarity_calculator_config.thresholded_iou_similarity + .iou_threshold) + + raise ValueError('Unknown region similarity calculator.') diff --git a/builders/region_similarity_calculator_builder_test.py b/builders/region_similarity_calculator_builder_test.py new file mode 100644 index 0000000..ca3a551 --- /dev/null +++ b/builders/region_similarity_calculator_builder_test.py @@ -0,0 +1,67 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for region_similarity_calculator_builder.""" + +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import region_similarity_calculator_builder +from object_detection.core import region_similarity_calculator +from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2 + + +class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase): + + def testBuildIoaSimilarityCalculator(self): + similarity_calc_text_proto = """ + ioa_similarity { + } + """ + similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() + text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) + similarity_calc = region_similarity_calculator_builder.build( + similarity_calc_proto) + self.assertTrue(isinstance(similarity_calc, + region_similarity_calculator.IoaSimilarity)) + + def testBuildIouSimilarityCalculator(self): + similarity_calc_text_proto = """ + iou_similarity { + } + """ + similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() + text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) + similarity_calc = region_similarity_calculator_builder.build( + similarity_calc_proto) + self.assertTrue(isinstance(similarity_calc, + region_similarity_calculator.IouSimilarity)) + + def testBuildNegSqDistSimilarityCalculator(self): + similarity_calc_text_proto = """ + neg_sq_dist_similarity { + } + """ + similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() + text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) + similarity_calc = region_similarity_calculator_builder.build( + similarity_calc_proto) + self.assertTrue(isinstance(similarity_calc, + region_similarity_calculator. + NegSqDistSimilarity)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/builders/target_assigner_builder.py b/builders/target_assigner_builder.py new file mode 100644 index 0000000..f6434f6 --- /dev/null +++ b/builders/target_assigner_builder.py @@ -0,0 +1,40 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A function to build an object detection box coder from configuration.""" +from object_detection.builders import box_coder_builder +from object_detection.builders import matcher_builder +from object_detection.builders import region_similarity_calculator_builder +from object_detection.core import target_assigner + + +def build(target_assigner_config): + """Builds a TargetAssigner object based on the config. + + Args: + target_assigner_config: A target_assigner proto message containing config + for the desired target assigner. + + Returns: + TargetAssigner object based on the config. + """ + matcher_instance = matcher_builder.build(target_assigner_config.matcher) + similarity_calc_instance = region_similarity_calculator_builder.build( + target_assigner_config.similarity_calculator) + box_coder = box_coder_builder.build(target_assigner_config.box_coder) + return target_assigner.TargetAssigner( + matcher=matcher_instance, + similarity_calc=similarity_calc_instance, + box_coder_instance=box_coder) diff --git a/builders/target_assigner_builder_test.py b/builders/target_assigner_builder_test.py new file mode 100644 index 0000000..9ca71b1 --- /dev/null +++ b/builders/target_assigner_builder_test.py @@ -0,0 +1,50 @@ +"""Tests for google3.third_party.tensorflow_models.object_detection.builders.target_assigner_builder.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import tensorflow as tf + +from google.protobuf import text_format + + +from object_detection.builders import target_assigner_builder +from object_detection.core import target_assigner +from object_detection.protos import target_assigner_pb2 + + +class TargetAssignerBuilderTest(tf.test.TestCase): + + def test_build_a_target_assigner(self): + target_assigner_text_proto = """ + matcher { + argmax_matcher {matched_threshold: 0.5} + } + similarity_calculator { + iou_similarity {} + } + box_coder { + faster_rcnn_box_coder {} + } + """ + target_assigner_proto = target_assigner_pb2.TargetAssigner() + text_format.Merge(target_assigner_text_proto, target_assigner_proto) + target_assigner_instance = target_assigner_builder.build( + target_assigner_proto) + self.assertIsInstance(target_assigner_instance, + target_assigner.TargetAssigner) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/core/__init__.py @@ -0,0 +1 @@ + diff --git a/core/anchor_generator.py b/core/anchor_generator.py new file mode 100644 index 0000000..070b1d6 --- /dev/null +++ b/core/anchor_generator.py @@ -0,0 +1,171 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base anchor generator. + +The job of the anchor generator is to create (or load) a collection +of bounding boxes to be used as anchors. + +Generated anchors are assumed to match some convolutional grid or list of grid +shapes. For example, we might want to generate anchors matching an 8x8 +feature map and a 4x4 feature map. If we place 3 anchors per grid location +on the first feature map and 6 anchors per grid location on the second feature +map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total. + +To support fully convolutional settings, feature map shapes are passed +dynamically at generation time. The number of anchors to place at each location +is static --- implementations of AnchorGenerator must always be able return +the number of anchors that it uses per location for each feature map. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from abc import ABCMeta +from abc import abstractmethod + +import six +from six.moves import zip +import tensorflow as tf + + +class AnchorGenerator(six.with_metaclass(ABCMeta, object)): + """Abstract base class for anchor generators.""" + + @abstractmethod + def name_scope(self): + """Name scope. + + Must be defined by implementations. + + Returns: + a string representing the name scope of the anchor generation operation. + """ + pass + + @property + def check_num_anchors(self): + """Whether to dynamically check the number of anchors generated. + + Can be overridden by implementations that would like to disable this + behavior. + + Returns: + a boolean controlling whether the Generate function should dynamically + check the number of anchors generated against the mathematically + expected number of anchors. + """ + return True + + @abstractmethod + def num_anchors_per_location(self): + """Returns the number of anchors per spatial location. + + Returns: + a list of integers, one for each expected feature map to be passed to + the `generate` function. + """ + pass + + def generate(self, feature_map_shape_list, **params): + """Generates a collection of bounding boxes to be used as anchors. + + TODO(rathodv): remove **params from argument list and make stride and + offsets (for multiple_grid_anchor_generator) constructor arguments. + + Args: + feature_map_shape_list: list of (height, width) pairs in the format + [(height_0, width_0), (height_1, width_1), ...] that the generated + anchors must align with. Pairs can be provided as 1-dimensional + integer tensors of length 2 or simply as tuples of integers. + **params: parameters for anchor generation op + + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + + Raises: + ValueError: if the number of feature map shapes does not match the length + of NumAnchorsPerLocation. + """ + if self.check_num_anchors and ( + len(feature_map_shape_list) != len(self.num_anchors_per_location())): + raise ValueError('Number of feature maps is expected to equal the length ' + 'of `num_anchors_per_location`.') + with tf.name_scope(self.name_scope()): + anchors_list = self._generate(feature_map_shape_list, **params) + if self.check_num_anchors: + with tf.control_dependencies([ + self._assert_correct_number_of_anchors( + anchors_list, feature_map_shape_list)]): + for item in anchors_list: + item.set(tf.identity(item.get())) + return anchors_list + + @abstractmethod + def _generate(self, feature_map_shape_list, **params): + """To be overridden by implementations. + + Args: + feature_map_shape_list: list of (height, width) pairs in the format + [(height_0, width_0), (height_1, width_1), ...] that the generated + anchors must align with. + **params: parameters for anchor generation op + + Returns: + boxes_list: a list of BoxList, each holding a collection of N anchor + boxes. + """ + pass + + def anchor_index_to_feature_map_index(self, boxlist_list): + """Returns a 1-D array of feature map indices for each anchor. + + Args: + boxlist_list: a list of Boxlist, each holding a collection of N anchor + boxes. This list is produced in self.generate(). + + Returns: + A [num_anchors] integer array, where each element indicates which feature + map index the anchor belongs to. + """ + feature_map_indices_list = [] + for i, boxes in enumerate(boxlist_list): + feature_map_indices_list.append( + i * tf.ones([boxes.num_boxes()], dtype=tf.int32)) + return tf.concat(feature_map_indices_list, axis=0) + + def _assert_correct_number_of_anchors(self, anchors_list, + feature_map_shape_list): + """Assert that correct number of anchors was generated. + + Args: + anchors_list: A list of box_list.BoxList object holding anchors generated. + feature_map_shape_list: list of (height, width) pairs in the format + [(height_0, width_0), (height_1, width_1), ...] that the generated + anchors must align with. + Returns: + Op that raises InvalidArgumentError if the number of anchors does not + match the number of expected anchors. + """ + expected_num_anchors = 0 + actual_num_anchors = 0 + for num_anchors_per_location, feature_map_shape, anchors in zip( + self.num_anchors_per_location(), feature_map_shape_list, anchors_list): + expected_num_anchors += (num_anchors_per_location + * feature_map_shape[0] + * feature_map_shape[1]) + actual_num_anchors += anchors.num_boxes() + return tf.assert_equal(expected_num_anchors, actual_num_anchors) diff --git a/core/balanced_positive_negative_sampler.py b/core/balanced_positive_negative_sampler.py new file mode 100644 index 0000000..89c1fc7 --- /dev/null +++ b/core/balanced_positive_negative_sampler.py @@ -0,0 +1,266 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Class to subsample minibatches by balancing positives and negatives. + +Subsamples minibatches based on a pre-specified positive fraction in range +[0,1]. The class presumes there are many more negatives than positive examples: +if the desired batch_size cannot be achieved with the pre-specified positive +fraction, it fills the rest with negative examples. If this is not sufficient +for obtaining the desired batch_size, it returns fewer examples. + +The main function to call is Subsample(self, indicator, labels). For convenience +one can also call SubsampleWeights(self, weights, labels) which is defined in +the minibatch_sampler base class. + +When is_static is True, it implements a method that guarantees static shapes. +It also ensures the length of output of the subsample is always batch_size, even +when number of examples set to True in indicator is less than batch_size. +""" + +import tensorflow as tf + +from object_detection.core import minibatch_sampler +from object_detection.utils import ops + + +class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): + """Subsamples minibatches to a desired balance of positives and negatives.""" + + def __init__(self, positive_fraction=0.5, is_static=False): + """Constructs a minibatch sampler. + + Args: + positive_fraction: desired fraction of positive examples (scalar in [0,1]) + in the batch. + is_static: If True, uses an implementation with static shape guarantees. + + Raises: + ValueError: if positive_fraction < 0, or positive_fraction > 1 + """ + if positive_fraction < 0 or positive_fraction > 1: + raise ValueError('positive_fraction should be in range [0,1]. ' + 'Received: %s.' % positive_fraction) + self._positive_fraction = positive_fraction + self._is_static = is_static + + def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size): + """Counts the number of positives and negatives numbers to be sampled. + + Args: + sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains + the signed indices of the examples where the sign is based on the label + value. The examples that cannot be sampled are set to 0. It samples + atmost sample_size*positive_fraction positive examples and remaining + from negative examples. + sample_size: Size of subsamples. + + Returns: + A tuple containing the number of positive and negative labels in the + subsample. + """ + input_length = tf.shape(sorted_indices_tensor)[0] + valid_positive_index = tf.greater(sorted_indices_tensor, + tf.zeros(input_length, tf.int32)) + num_sampled_pos = tf.reduce_sum(tf.cast(valid_positive_index, tf.int32)) + max_num_positive_samples = tf.constant( + int(sample_size * self._positive_fraction), tf.int32) + num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos) + num_negative_samples = tf.constant(sample_size, + tf.int32) - num_positive_samples + + return num_positive_samples, num_negative_samples + + def _get_values_from_start_and_end(self, input_tensor, num_start_samples, + num_end_samples, total_num_samples): + """slices num_start_samples and last num_end_samples from input_tensor. + + Args: + input_tensor: An int32 tensor of shape [N] to be sliced. + num_start_samples: Number of examples to be sliced from the beginning + of the input tensor. + num_end_samples: Number of examples to be sliced from the end of the + input tensor. + total_num_samples: Sum of is num_start_samples and num_end_samples. This + should be a scalar. + + Returns: + A tensor containing the first num_start_samples and last num_end_samples + from input_tensor. + + """ + input_length = tf.shape(input_tensor)[0] + start_positions = tf.less(tf.range(input_length), num_start_samples) + end_positions = tf.greater_equal( + tf.range(input_length), input_length - num_end_samples) + selected_positions = tf.logical_or(start_positions, end_positions) + selected_positions = tf.cast(selected_positions, tf.float32) + indexed_positions = tf.multiply(tf.cumsum(selected_positions), + selected_positions) + one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1, + total_num_samples, + dtype=tf.float32) + return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32), + one_hot_selector, axes=[0, 0]), tf.int32) + + def _static_subsample(self, indicator, batch_size, labels): + """Returns subsampled minibatch. + + Args: + indicator: boolean tensor of shape [N] whose True entries can be sampled. + N should be a complie time constant. + batch_size: desired batch size. This scalar cannot be None. + labels: boolean tensor of shape [N] denoting positive(=True) and negative + (=False) examples. N should be a complie time constant. + + Returns: + sampled_idx_indicator: boolean tensor of shape [N], True for entries which + are sampled. It ensures the length of output of the subsample is always + batch_size, even when number of examples set to True in indicator is + less than batch_size. + + Raises: + ValueError: if labels and indicator are not 1D boolean tensors. + """ + # Check if indicator and labels have a static size. + if not indicator.shape.is_fully_defined(): + raise ValueError('indicator must be static in shape when is_static is' + 'True') + if not labels.shape.is_fully_defined(): + raise ValueError('labels must be static in shape when is_static is' + 'True') + if not isinstance(batch_size, int): + raise ValueError('batch_size has to be an integer when is_static is' + 'True.') + + input_length = tf.shape(indicator)[0] + + # Set the number of examples set True in indicator to be at least + # batch_size. + num_true_sampled = tf.reduce_sum(tf.cast(indicator, tf.float32)) + additional_false_sample = tf.less_equal( + tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)), + batch_size - num_true_sampled) + indicator = tf.logical_or(indicator, additional_false_sample) + + # Shuffle indicator and label. Need to store the permutation to restore the + # order post sampling. + permutation = tf.random_shuffle(tf.range(input_length)) + indicator = ops.matmul_gather_on_zeroth_axis( + tf.cast(indicator, tf.float32), permutation) + labels = ops.matmul_gather_on_zeroth_axis( + tf.cast(labels, tf.float32), permutation) + + # index (starting from 1) when indicator is True, 0 when False + indicator_idx = tf.where( + tf.cast(indicator, tf.bool), tf.range(1, input_length + 1), + tf.zeros(input_length, tf.int32)) + + # Replace -1 for negative, +1 for positive labels + signed_label = tf.where( + tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32), + tf.scalar_mul(-1, tf.ones(input_length, tf.int32))) + # negative of index for negative label, positive index for positive label, + # 0 when indicator is False. + signed_indicator_idx = tf.multiply(indicator_idx, signed_label) + sorted_signed_indicator_idx = tf.nn.top_k( + signed_indicator_idx, input_length, sorted=True).values + + [num_positive_samples, + num_negative_samples] = self._get_num_pos_neg_samples( + sorted_signed_indicator_idx, batch_size) + + sampled_idx = self._get_values_from_start_and_end( + sorted_signed_indicator_idx, num_positive_samples, + num_negative_samples, batch_size) + + # Shift the indices to start from 0 and remove any samples that are set as + # False. + sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32) + sampled_idx = tf.multiply( + tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32), + sampled_idx) + + sampled_idx_indicator = tf.cast(tf.reduce_sum( + tf.one_hot(sampled_idx, depth=input_length), + axis=0), tf.bool) + + # project back the order based on stored permutations + reprojections = tf.one_hot(permutation, depth=input_length, + dtype=tf.float32) + return tf.cast(tf.tensordot( + tf.cast(sampled_idx_indicator, tf.float32), + reprojections, axes=[0, 0]), tf.bool) + + def subsample(self, indicator, batch_size, labels, scope=None): + """Returns subsampled minibatch. + + Args: + indicator: boolean tensor of shape [N] whose True entries can be sampled. + batch_size: desired batch size. If None, keeps all positive samples and + randomly selects negative samples so that the positive sample fraction + matches self._positive_fraction. It cannot be None is is_static is True. + labels: boolean tensor of shape [N] denoting positive(=True) and negative + (=False) examples. + scope: name scope. + + Returns: + sampled_idx_indicator: boolean tensor of shape [N], True for entries which + are sampled. + + Raises: + ValueError: if labels and indicator are not 1D boolean tensors. + """ + if len(indicator.get_shape().as_list()) != 1: + raise ValueError('indicator must be 1 dimensional, got a tensor of ' + 'shape %s' % indicator.get_shape()) + if len(labels.get_shape().as_list()) != 1: + raise ValueError('labels must be 1 dimensional, got a tensor of ' + 'shape %s' % labels.get_shape()) + if labels.dtype != tf.bool: + raise ValueError('labels should be of type bool. Received: %s' % + labels.dtype) + if indicator.dtype != tf.bool: + raise ValueError('indicator should be of type bool. Received: %s' % + indicator.dtype) + with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): + if self._is_static: + return self._static_subsample(indicator, batch_size, labels) + + else: + # Only sample from indicated samples + negative_idx = tf.logical_not(labels) + positive_idx = tf.logical_and(labels, indicator) + negative_idx = tf.logical_and(negative_idx, indicator) + + # Sample positive and negative samples separately + if batch_size is None: + max_num_pos = tf.reduce_sum(tf.cast(positive_idx, dtype=tf.int32)) + else: + max_num_pos = int(self._positive_fraction * batch_size) + sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) + num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) + if batch_size is None: + negative_positive_ratio = ( + 1 - self._positive_fraction) / self._positive_fraction + max_num_neg = tf.cast( + negative_positive_ratio * + tf.cast(num_sampled_pos, dtype=tf.float32), + dtype=tf.int32) + else: + max_num_neg = batch_size - num_sampled_pos + sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) + + return tf.logical_or(sampled_pos_idx, sampled_neg_idx) diff --git a/core/balanced_positive_negative_sampler_test.py b/core/balanced_positive_negative_sampler_test.py new file mode 100644 index 0000000..1df28e4 --- /dev/null +++ b/core/balanced_positive_negative_sampler_test.py @@ -0,0 +1,204 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.balanced_positive_negative_sampler.""" + +import numpy as np +import tensorflow as tf + +from object_detection.core import balanced_positive_negative_sampler +from object_detection.utils import test_case + + +class BalancedPositiveNegativeSamplerTest(test_case.TestCase): + + def test_subsample_all_examples_dynamic(self): + numpy_labels = np.random.permutation(300) + indicator = tf.constant(np.ones(300) == 1) + numpy_labels = (numpy_labels - 200) > 0 + + labels = tf.constant(numpy_labels) + + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler()) + is_sampled = sampler.subsample(indicator, 64, labels) + with self.test_session() as sess: + is_sampled = sess.run(is_sampled) + self.assertTrue(sum(is_sampled) == 64) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32) + self.assertTrue(sum(np.logical_and( + np.logical_not(numpy_labels), is_sampled)) == 32) + + def test_subsample_all_examples_static(self): + numpy_labels = np.random.permutation(300) + indicator = np.array(np.ones(300) == 1, np.bool) + numpy_labels = (numpy_labels - 200) > 0 + + labels = np.array(numpy_labels, np.bool) + + def graph_fn(indicator, labels): + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + is_static=True)) + return sampler.subsample(indicator, 64, labels) + + is_sampled = self.execute(graph_fn, [indicator, labels]) + self.assertTrue(sum(is_sampled) == 64) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32) + self.assertTrue(sum(np.logical_and( + np.logical_not(numpy_labels), is_sampled)) == 32) + + def test_subsample_selection_dynamic(self): + # Test random sampling when only some examples can be sampled: + # 100 samples, 20 positives, 10 positives cannot be sampled + numpy_labels = np.arange(100) + numpy_indicator = numpy_labels < 90 + indicator = tf.constant(numpy_indicator) + numpy_labels = (numpy_labels - 80) >= 0 + + labels = tf.constant(numpy_labels) + + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler()) + is_sampled = sampler.subsample(indicator, 64, labels) + with self.test_session() as sess: + is_sampled = sess.run(is_sampled) + self.assertTrue(sum(is_sampled) == 64) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) + self.assertTrue(sum(np.logical_and( + np.logical_not(numpy_labels), is_sampled)) == 54) + self.assertAllEqual(is_sampled, np.logical_and(is_sampled, + numpy_indicator)) + + def test_subsample_selection_static(self): + # Test random sampling when only some examples can be sampled: + # 100 samples, 20 positives, 10 positives cannot be sampled. + numpy_labels = np.arange(100) + numpy_indicator = numpy_labels < 90 + indicator = np.array(numpy_indicator, np.bool) + numpy_labels = (numpy_labels - 80) >= 0 + + labels = np.array(numpy_labels, np.bool) + + def graph_fn(indicator, labels): + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + is_static=True)) + return sampler.subsample(indicator, 64, labels) + + is_sampled = self.execute(graph_fn, [indicator, labels]) + self.assertTrue(sum(is_sampled) == 64) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) + self.assertTrue(sum(np.logical_and( + np.logical_not(numpy_labels), is_sampled)) == 54) + self.assertAllEqual(is_sampled, np.logical_and(is_sampled, numpy_indicator)) + + def test_subsample_selection_larger_batch_size_dynamic(self): + # Test random sampling when total number of examples that can be sampled are + # less than batch size: + # 100 samples, 50 positives, 40 positives cannot be sampled, batch size 64. + numpy_labels = np.arange(100) + numpy_indicator = numpy_labels < 60 + indicator = tf.constant(numpy_indicator) + numpy_labels = (numpy_labels - 50) >= 0 + + labels = tf.constant(numpy_labels) + + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler()) + is_sampled = sampler.subsample(indicator, 64, labels) + with self.test_session() as sess: + is_sampled = sess.run(is_sampled) + self.assertTrue(sum(is_sampled) == 60) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) + self.assertTrue( + sum(np.logical_and(np.logical_not(numpy_labels), is_sampled)) == 50) + self.assertAllEqual(is_sampled, np.logical_and(is_sampled, + numpy_indicator)) + + def test_subsample_selection_larger_batch_size_static(self): + # Test random sampling when total number of examples that can be sampled are + # less than batch size: + # 100 samples, 50 positives, 40 positives cannot be sampled, batch size 64. + # It should still return 64 samples, with 4 of them that couldn't have been + # sampled. + numpy_labels = np.arange(100) + numpy_indicator = numpy_labels < 60 + indicator = np.array(numpy_indicator, np.bool) + numpy_labels = (numpy_labels - 50) >= 0 + + labels = np.array(numpy_labels, np.bool) + + def graph_fn(indicator, labels): + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + is_static=True)) + return sampler.subsample(indicator, 64, labels) + + is_sampled = self.execute(graph_fn, [indicator, labels]) + self.assertTrue(sum(is_sampled) == 64) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) >= 10) + self.assertTrue( + sum(np.logical_and(np.logical_not(numpy_labels), is_sampled)) >= 50) + self.assertTrue(sum(np.logical_and(is_sampled, numpy_indicator)) == 60) + + def test_subsample_selection_no_batch_size(self): + # Test random sampling when only some examples can be sampled: + # 1000 samples, 6 positives (5 can be sampled). + numpy_labels = np.arange(1000) + numpy_indicator = numpy_labels < 999 + indicator = tf.constant(numpy_indicator) + numpy_labels = (numpy_labels - 994) >= 0 + + labels = tf.constant(numpy_labels) + + sampler = (balanced_positive_negative_sampler. + BalancedPositiveNegativeSampler(0.01)) + is_sampled = sampler.subsample(indicator, None, labels) + with self.test_session() as sess: + is_sampled = sess.run(is_sampled) + self.assertTrue(sum(is_sampled) == 500) + self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 5) + self.assertTrue(sum(np.logical_and( + np.logical_not(numpy_labels), is_sampled)) == 495) + self.assertAllEqual(is_sampled, np.logical_and(is_sampled, + numpy_indicator)) + + def test_subsample_selection_no_batch_size_static(self): + labels = tf.constant([[True, False, False]]) + indicator = tf.constant([True, False, True]) + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler()) + with self.assertRaises(ValueError): + sampler.subsample(indicator, None, labels) + + def test_raises_error_with_incorrect_label_shape(self): + labels = tf.constant([[True, False, False]]) + indicator = tf.constant([True, False, True]) + sampler = (balanced_positive_negative_sampler. + BalancedPositiveNegativeSampler()) + with self.assertRaises(ValueError): + sampler.subsample(indicator, 64, labels) + + def test_raises_error_with_incorrect_indicator_shape(self): + labels = tf.constant([True, False, False]) + indicator = tf.constant([[True, False, True]]) + sampler = (balanced_positive_negative_sampler. + BalancedPositiveNegativeSampler()) + with self.assertRaises(ValueError): + sampler.subsample(indicator, 64, labels) + +if __name__ == '__main__': + tf.test.main() diff --git a/core/batch_multiclass_nms_test.py b/core/batch_multiclass_nms_test.py new file mode 100644 index 0000000..cd0c56b --- /dev/null +++ b/core/batch_multiclass_nms_test.py @@ -0,0 +1,721 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for google3.third_party.tensorflow_models.object_detection.core.batch_multiclass_nms.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from absl.testing import parameterized +import numpy as np +from six.moves import range +import tensorflow as tf +from object_detection.core import post_processing +from object_detection.utils import test_case + + +class BatchMulticlassNonMaxSuppressionTest(test_case.TestCase, + parameterized.TestCase): + + @parameterized.named_parameters(('', False), ('_use_static_shapes', True)) + def test_batch_multiclass_nms_with_batch_size_1(self, use_static_shapes): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]], + [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 999, 2, 1004], + [0, 100, 1, 101]]] + exp_nms_scores = [[.95, .9, .85, .3]] + exp_nms_classes = [[0, 0, 1, 0]] + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, + num_detections) = post_processing.batch_multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_size_per_class=max_output_size, + max_total_size=max_output_size, + use_static_shapes=use_static_shapes) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertEqual(num_detections, [4]) + + def test_batch_iou_with_negative_data(self): + boxes = tf.constant([[[0, -0.01, 0.1, 1.1], [0, 0.2, 0.2, 5.0], + [0, -0.01, 0.1, 1.], [-1, -1, -1, -1]]], tf.float32) + iou = post_processing.batch_iou(boxes, boxes) + expected_iou = [[[0.99999994, 0.0917431, 0.9099099, -1.], + [0.0917431, 1., 0.08154944, -1.], + [0.9099099, 0.08154944, 1., -1.], [-1., -1., -1., -1.]]] + with self.test_session() as sess: + iou = sess.run(iou) + self.assertAllClose(iou, expected_iou) + + @parameterized.parameters(False, True) + def test_batch_multiclass_nms_with_batch_size_2(self, use_dynamic_map_fn): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 999, 2, 1004], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.95, .9, 0, 0], + [.85, .5, .3, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [1, 0, 0, 0]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + use_dynamic_map_fn=use_dynamic_map_fn) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), + exp_nms_corners.shape) + self.assertAllEqual(nmsed_scores.shape.as_list(), + exp_nms_scores.shape) + self.assertAllEqual(nmsed_classes.shape.as_list(), + exp_nms_classes.shape) + self.assertEqual(num_detections.shape.as_list(), [2]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [2, 3]) + + def test_batch_multiclass_nms_with_per_batch_clip_window(self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + clip_window = tf.constant([0., 0., 200., 200.]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.95, .9, 0, 0], + [.5, .3, 0, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [0, 0, 0, 0]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + clip_window=clip_window) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), + exp_nms_corners.shape) + self.assertAllEqual(nmsed_scores.shape.as_list(), + exp_nms_scores.shape) + self.assertAllEqual(nmsed_classes.shape.as_list(), + exp_nms_classes.shape) + self.assertEqual(num_detections.shape.as_list(), [2]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [2, 2]) + + def test_batch_multiclass_nms_with_per_image_clip_window(self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + clip_window = tf.constant([[0., 0., 5., 5.], + [0., 0., 200., 200.]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.9, 0., 0., 0.], + [.5, .3, 0, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [0, 0, 0, 0]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + clip_window=clip_window) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), + exp_nms_corners.shape) + self.assertAllEqual(nmsed_scores.shape.as_list(), + exp_nms_scores.shape) + self.assertAllEqual(nmsed_classes.shape.as_list(), + exp_nms_classes.shape) + self.assertEqual(num_detections.shape.as_list(), [2]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [1, 2]) + + def test_batch_multiclass_nms_with_masks(self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], + [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], + [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], + [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], + [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], + [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], + [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], + [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], + tf.float32) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 999, 2, 1004], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.95, .9, 0, 0], + [.85, .5, .3, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [1, 0, 0, 0]]) + exp_nms_masks = np.array([[[[6, 7], [8, 9]], + [[0, 1], [2, 3]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[13, 14], [15, 16]], + [[8, 9], [10, 11]], + [[10, 11], [12, 13]], + [[0, 0], [0, 0]]]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + masks=masks) + + self.assertIsNone(nmsed_additional_fields) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) + self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) + self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) + self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape) + self.assertEqual(num_detections.shape.as_list(), [2]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + nmsed_masks, num_detections]) + + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [2, 3]) + self.assertAllClose(nmsed_masks, exp_nms_masks) + + def test_batch_multiclass_nms_with_additional_fields(self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + additional_fields = { + 'keypoints': tf.constant( + [[[[6, 7], [8, 9]], + [[0, 1], [2, 3]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[13, 14], [15, 16]], + [[8, 9], [10, 11]], + [[10, 11], [12, 13]], + [[0, 0], [0, 0]]]], + tf.float32) + } + additional_fields['size'] = tf.constant( + [[[[6], [8]], [[0], [2]], [[0], [0]], [[0], [0]]], + [[[13], [15]], [[8], [10]], [[10], [12]], [[0], [0]]]], tf.float32) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 999, 2, 1004], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.95, .9, 0, 0], + [.85, .5, .3, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [1, 0, 0, 0]]) + exp_nms_additional_fields = { + 'keypoints': np.array([[[[0, 0], [0, 0]], + [[6, 7], [8, 9]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[10, 11], [12, 13]], + [[13, 14], [15, 16]], + [[8, 9], [10, 11]], + [[0, 0], [0, 0]]]]) + } + exp_nms_additional_fields['size'] = np.array([[[[0], [0]], [[6], [8]], + [[0], [0]], [[0], [0]]], + [[[10], [12]], [[13], [15]], + [[8], [10]], [[0], [0]]]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + additional_fields=additional_fields) + + self.assertIsNone(nmsed_masks) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) + self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) + self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) + self.assertEqual(len(nmsed_additional_fields), + len(exp_nms_additional_fields)) + for key in exp_nms_additional_fields: + self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(), + exp_nms_additional_fields[key].shape) + self.assertEqual(num_detections.shape.as_list(), [2]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + nmsed_additional_fields, num_detections]) + + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + for key in exp_nms_additional_fields: + self.assertAllClose(nmsed_additional_fields[key], + exp_nms_additional_fields[key]) + self.assertAllClose(num_detections, [2, 3]) + + def test_batch_multiclass_nms_with_dynamic_batch_size(self): + boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4)) + scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2)) + masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2)) + + boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]]) + scores = np.array([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], + [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], + [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], + [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], + [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], + [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], + [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], + [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 999, 2, 1004], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101], + [0, 0, 0, 0]]]) + exp_nms_scores = np.array([[.95, .9, 0, 0], + [.85, .5, .3, 0]]) + exp_nms_classes = np.array([[0, 0, 0, 0], + [1, 0, 0, 0]]) + exp_nms_masks = np.array([[[[6, 7], [8, 9]], + [[0, 1], [2, 3]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[13, 14], [15, 16]], + [[8, 9], [10, 11]], + [[10, 11], [12, 13]], + [[0, 0], [0, 0]]]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes_placeholder, scores_placeholder, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + masks=masks_placeholder) + + self.assertIsNone(nmsed_additional_fields) + # Check static shapes + self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4]) + self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4]) + self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4]) + self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2]) + self.assertEqual(num_detections.shape.as_list(), [None]) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + nmsed_masks, num_detections], + feed_dict={boxes_placeholder: boxes, + scores_placeholder: scores, + masks_placeholder: masks}) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [2, 3]) + self.assertAllClose(nmsed_masks, exp_nms_masks) + + def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], + [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], + [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], + [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], + [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], + [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], + [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], + [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], + tf.float32) + num_valid_boxes = tf.constant([1, 1], tf.int32) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[[0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 10.1, 1, 11.1], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_nms_scores = [[.9, 0, 0, 0], + [.5, 0, 0, 0]] + exp_nms_classes = [[0, 0, 0, 0], + [0, 0, 0, 0]] + exp_nms_masks = [[[[0, 1], [2, 3]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[8, 9], [10, 11]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]]] + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + num_valid_boxes=num_valid_boxes, masks=masks) + + self.assertIsNone(nmsed_additional_fields) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + nmsed_masks, num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertAllClose(num_detections, [1, 1]) + self.assertAllClose(nmsed_masks, exp_nms_masks) + + def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes( + self): + boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]]], + [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], + tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0]], + [[.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]]) + additional_fields = { + 'keypoints': tf.constant( + [[[[6, 7], [8, 9]], + [[0, 1], [2, 3]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[13, 14], [15, 16]], + [[8, 9], [10, 11]], + [[10, 11], [12, 13]], + [[0, 0], [0, 0]]]], + tf.float32) + } + + additional_fields['size'] = tf.constant( + [[[[7], [9]], [[1], [3]], [[0], [0]], [[0], [0]]], + [[[14], [16]], [[9], [11]], [[11], [13]], [[0], [0]]]], tf.float32) + + num_valid_boxes = tf.constant([1, 1], tf.int32) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[[0, 0, 1, 1], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 10.1, 1, 11.1], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_nms_scores = [[.9, 0, 0, 0], + [.5, 0, 0, 0]] + exp_nms_classes = [[0, 0, 0, 0], + [0, 0, 0, 0]] + exp_nms_additional_fields = { + 'keypoints': np.array([[[[6, 7], [8, 9]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]], + [[[13, 14], [15, 16]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]], + [[0, 0], [0, 0]]]]) + } + + exp_nms_additional_fields['size'] = np.array([[[[7], [9]], [[0], [0]], + [[0], [0]], [[0], [0]]], + [[[14], [16]], [[0], [0]], + [[0], [0]], [[0], [0]]]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + num_valid_boxes=num_valid_boxes, + additional_fields=additional_fields) + + self.assertIsNone(nmsed_masks) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + nmsed_additional_fields, num_detections]) + + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + for key in exp_nms_additional_fields: + self.assertAllClose(nmsed_additional_fields[key], + exp_nms_additional_fields[key]) + self.assertAllClose(num_detections, [1, 1]) + + def test_combined_nms_with_batch_size_2(self): + """Test use_combined_nms.""" + boxes = tf.constant([[[[0, 0, 0.1, 0.1], [0, 0, 0.1, 0.1]], + [[0, 0.01, 1, 0.11], [0, 0.6, 0.1, 0.7]], + [[0, -0.01, 0.1, 0.09], [0, -0.1, 0.1, 0.09]], + [[0, 0.11, 0.1, 0.2], [0, 0.11, 0.1, 0.2]]], + [[[0, 0, 0.2, 0.2], [0, 0, 0.2, 0.2]], + [[0, 0.02, 0.2, 0.22], [0, 0.02, 0.2, 0.22]], + [[0, -0.02, 0.2, 0.19], [0, -0.02, 0.2, 0.19]], + [[0, 0.21, 0.2, 0.3], [0, 0.21, 0.2, 0.3]]]], + tf.float32) + scores = tf.constant([[[.1, 0.9], [.75, 0.8], + [.6, 0.3], [0.95, 0.1]], + [[.1, 0.9], [.75, 0.8], + [.6, .3], [.95, .1]]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 3 + + exp_nms_corners = np.array([[[0, 0.11, 0.1, 0.2], + [0, 0, 0.1, 0.1], + [0, 0.6, 0.1, 0.7]], + [[0, 0.21, 0.2, 0.3], + [0, 0, 0.2, 0.2], + [0, 0.02, 0.2, 0.22]]]) + exp_nms_scores = np.array([[.95, .9, 0.8], + [.95, .9, .75]]) + exp_nms_classes = np.array([[0, 1, 1], + [0, 1, 0]]) + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, num_detections + ) = post_processing.batch_multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, + max_size_per_class=max_output_size, max_total_size=max_output_size, + use_static_shapes=True, + use_combined_nms=True) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, + num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertListEqual(num_detections.tolist(), [3, 3]) + + # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9 + +if __name__ == '__main__': + tf.test.main() diff --git a/core/batcher.py b/core/batcher.py new file mode 100644 index 0000000..825b90d --- /dev/null +++ b/core/batcher.py @@ -0,0 +1,141 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Provides functions to batch a dictionary of input tensors.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from six.moves import range +import tensorflow as tf + +from object_detection.core import prefetcher + +rt_shape_str = '_runtime_shapes' + + +class BatchQueue(object): + """BatchQueue class. + + This class creates a batch queue to asynchronously enqueue tensors_dict. + It also adds a FIFO prefetcher so that the batches are readily available + for the consumers. Dequeue ops for a BatchQueue object can be created via + the Dequeue method which evaluates to a batch of tensor_dict. + + Example input pipeline with batching: + ------------------------------------ + key, string_tensor = slim.parallel_reader.parallel_read(...) + tensor_dict = decoder.decode(string_tensor) + tensor_dict = preprocessor.preprocess(tensor_dict, ...) + batch_queue = batcher.BatchQueue(tensor_dict, + batch_size=32, + batch_queue_capacity=2000, + num_batch_queue_threads=8, + prefetch_queue_capacity=20) + tensor_dict = batch_queue.dequeue() + outputs = Model(tensor_dict) + ... + ----------------------------------- + + Notes: + ----- + This class batches tensors of unequal sizes by zero padding and unpadding + them after generating a batch. This can be computationally expensive when + batching tensors (such as images) that are of vastly different sizes. So it is + recommended that the shapes of such tensors be fully defined in tensor_dict + while other lightweight tensors such as bounding box corners and class labels + can be of varying sizes. Use either crop or resize operations to fully define + the shape of an image in tensor_dict. + + It is also recommended to perform any preprocessing operations on tensors + before passing to BatchQueue and subsequently calling the Dequeue method. + + Another caveat is that this class does not read the last batch if it is not + full. The current implementation makes it hard to support that use case. So, + for evaluation, when it is critical to run all the examples through your + network use the input pipeline example mentioned in core/prefetcher.py. + """ + + def __init__(self, tensor_dict, batch_size, batch_queue_capacity, + num_batch_queue_threads, prefetch_queue_capacity): + """Constructs a batch queue holding tensor_dict. + + Args: + tensor_dict: dictionary of tensors to batch. + batch_size: batch size. + batch_queue_capacity: max capacity of the queue from which the tensors are + batched. + num_batch_queue_threads: number of threads to use for batching. + prefetch_queue_capacity: max capacity of the queue used to prefetch + assembled batches. + """ + # Remember static shapes to set shapes of batched tensors. + static_shapes = collections.OrderedDict( + {key: tensor.get_shape() for key, tensor in tensor_dict.items()}) + # Remember runtime shapes to unpad tensors after batching. + runtime_shapes = collections.OrderedDict( + {(key + rt_shape_str): tf.shape(tensor) + for key, tensor in tensor_dict.items()}) + + all_tensors = tensor_dict + all_tensors.update(runtime_shapes) + batched_tensors = tf.train.batch( + all_tensors, + capacity=batch_queue_capacity, + batch_size=batch_size, + dynamic_pad=True, + num_threads=num_batch_queue_threads) + + self._queue = prefetcher.prefetch(batched_tensors, + prefetch_queue_capacity) + self._static_shapes = static_shapes + self._batch_size = batch_size + + def dequeue(self): + """Dequeues a batch of tensor_dict from the BatchQueue. + + TODO: use allow_smaller_final_batch to allow running over the whole eval set + + Returns: + A list of tensor_dicts of the requested batch_size. + """ + batched_tensors = self._queue.dequeue() + # Separate input tensors from tensors containing their runtime shapes. + tensors = {} + shapes = {} + for key, batched_tensor in batched_tensors.items(): + unbatched_tensor_list = tf.unstack(batched_tensor) + for i, unbatched_tensor in enumerate(unbatched_tensor_list): + if rt_shape_str in key: + shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor + else: + tensors[(key, i)] = unbatched_tensor + + # Undo that padding using shapes and create a list of size `batch_size` that + # contains tensor dictionaries. + tensor_dict_list = [] + batch_size = self._batch_size + for batch_id in range(batch_size): + tensor_dict = {} + for key in self._static_shapes: + tensor_dict[key] = tf.slice(tensors[(key, batch_id)], + tf.zeros_like(shapes[(key, batch_id)]), + shapes[(key, batch_id)]) + tensor_dict[key].set_shape(self._static_shapes[key]) + tensor_dict_list.append(tensor_dict) + + return tensor_dict_list diff --git a/core/batcher_test.py b/core/batcher_test.py new file mode 100644 index 0000000..a6c9faf --- /dev/null +++ b/core/batcher_test.py @@ -0,0 +1,163 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.batcher.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import range +import tensorflow as tf + +from object_detection.core import batcher + +slim = tf.contrib.slim + + +class BatcherTest(tf.test.TestCase): + + def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self): + with self.test_session() as sess: + batch_size = 3 + num_batches = 2 + examples = tf.Variable(tf.constant(2, dtype=tf.int32)) + counter = examples.count_up_to(num_batches * batch_size + 2) + boxes = tf.tile( + tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)])) + batch_queue = batcher.BatchQueue( + tensor_dict={'boxes': boxes}, + batch_size=batch_size, + batch_queue_capacity=100, + num_batch_queue_threads=1, + prefetch_queue_capacity=100) + batch = batch_queue.dequeue() + + for tensor_dict in batch: + for tensor in tensor_dict.values(): + self.assertAllEqual([None, 4], tensor.get_shape().as_list()) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + i = 2 + for _ in range(num_batches): + batch_np = sess.run(batch) + for tensor_dict in batch_np: + for tensor in tensor_dict.values(): + self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1))) + i += 1 + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(batch) + + def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions( + self): + with self.test_session() as sess: + batch_size = 3 + num_batches = 2 + examples = tf.Variable(tf.constant(2, dtype=tf.int32)) + counter = examples.count_up_to(num_batches * batch_size + 2) + image = tf.reshape( + tf.range(counter * counter), tf.stack([counter, counter])) + batch_queue = batcher.BatchQueue( + tensor_dict={'image': image}, + batch_size=batch_size, + batch_queue_capacity=100, + num_batch_queue_threads=1, + prefetch_queue_capacity=100) + batch = batch_queue.dequeue() + + for tensor_dict in batch: + for tensor in tensor_dict.values(): + self.assertAllEqual([None, None], tensor.get_shape().as_list()) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + i = 2 + for _ in range(num_batches): + batch_np = sess.run(batch) + for tensor_dict in batch_np: + for tensor in tensor_dict.values(): + self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) + i += 1 + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(batch) + + def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self): + with self.test_session() as sess: + batch_size = 3 + num_batches = 2 + examples = tf.Variable(tf.constant(1, dtype=tf.int32)) + counter = examples.count_up_to(num_batches * batch_size + 1) + image = tf.reshape(tf.range(1, 13), [4, 3]) * counter + batch_queue = batcher.BatchQueue( + tensor_dict={'image': image}, + batch_size=batch_size, + batch_queue_capacity=100, + num_batch_queue_threads=1, + prefetch_queue_capacity=100) + batch = batch_queue.dequeue() + + for tensor_dict in batch: + for tensor in tensor_dict.values(): + self.assertAllEqual([4, 3], tensor.get_shape().as_list()) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + i = 1 + for _ in range(num_batches): + batch_np = sess.run(batch) + for tensor_dict in batch_np: + for tensor in tensor_dict.values(): + self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i) + i += 1 + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(batch) + + def test_batcher_when_batch_size_is_one(self): + with self.test_session() as sess: + batch_size = 1 + num_batches = 2 + examples = tf.Variable(tf.constant(2, dtype=tf.int32)) + counter = examples.count_up_to(num_batches * batch_size + 2) + image = tf.reshape( + tf.range(counter * counter), tf.stack([counter, counter])) + batch_queue = batcher.BatchQueue( + tensor_dict={'image': image}, + batch_size=batch_size, + batch_queue_capacity=100, + num_batch_queue_threads=1, + prefetch_queue_capacity=100) + batch = batch_queue.dequeue() + + for tensor_dict in batch: + for tensor in tensor_dict.values(): + self.assertAllEqual([None, None], tensor.get_shape().as_list()) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + i = 2 + for _ in range(num_batches): + batch_np = sess.run(batch) + for tensor_dict in batch_np: + for tensor in tensor_dict.values(): + self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) + i += 1 + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(batch) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/box_coder.py b/core/box_coder.py new file mode 100644 index 0000000..82f084d --- /dev/null +++ b/core/box_coder.py @@ -0,0 +1,158 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base box coder. + +Box coders convert between coordinate frames, namely image-centric +(with (0,0) on the top left of image) and anchor-centric (with (0,0) being +defined by a specific anchor). + +Users of a BoxCoder can call two methods: + encode: which encodes a box with respect to a given anchor + (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and + decode: which inverts this encoding with a decode operation. +In both cases, the arguments are assumed to be in 1-1 correspondence already; +it is not the job of a BoxCoder to perform matching. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from abc import ABCMeta +from abc import abstractmethod +from abc import abstractproperty + +import six +import tensorflow as tf + +from object_detection.utils import shape_utils + + +# Box coder types. +FASTER_RCNN = 'faster_rcnn' +KEYPOINT = 'keypoint' +MEAN_STDDEV = 'mean_stddev' +SQUARE = 'square' + + +class BoxCoder(six.with_metaclass(ABCMeta, object)): + """Abstract base class for box coder.""" + + @abstractproperty + def code_size(self): + """Return the size of each code. + + This number is a constant and should agree with the output of the `encode` + op (e.g. if rel_codes is the output of self.encode(...), then it should have + shape [N, code_size()]). This abstractproperty should be overridden by + implementations. + + Returns: + an integer constant + """ + pass + + def encode(self, boxes, anchors): + """Encode a box list relative to an anchor collection. + + Args: + boxes: BoxList holding N boxes to be encoded + anchors: BoxList of N anchors + + Returns: + a tensor representing N relative-encoded boxes + """ + with tf.name_scope('Encode'): + return self._encode(boxes, anchors) + + def decode(self, rel_codes, anchors): + """Decode boxes that are encoded relative to an anchor collection. + + Args: + rel_codes: a tensor representing N relative-encoded boxes + anchors: BoxList of anchors + + Returns: + boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., + with corners y_min, x_min, y_max, x_max) + """ + with tf.name_scope('Decode'): + return self._decode(rel_codes, anchors) + + @abstractmethod + def _encode(self, boxes, anchors): + """Method to be overriden by implementations. + + Args: + boxes: BoxList holding N boxes to be encoded + anchors: BoxList of N anchors + + Returns: + a tensor representing N relative-encoded boxes + """ + pass + + @abstractmethod + def _decode(self, rel_codes, anchors): + """Method to be overriden by implementations. + + Args: + rel_codes: a tensor representing N relative-encoded boxes + anchors: BoxList of anchors + + Returns: + boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., + with corners y_min, x_min, y_max, x_max) + """ + pass + + +def batch_decode(encoded_boxes, box_coder, anchors): + """Decode a batch of encoded boxes. + + This op takes a batch of encoded bounding boxes and transforms + them to a batch of bounding boxes specified by their corners in + the order of [y_min, x_min, y_max, x_max]. + + Args: + encoded_boxes: a float32 tensor of shape [batch_size, num_anchors, + code_size] representing the location of the objects. + box_coder: a BoxCoder object. + anchors: a BoxList of anchors used to encode `encoded_boxes`. + + Returns: + decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, + coder_size] representing the corners of the objects in the order + of [y_min, x_min, y_max, x_max]. + + Raises: + ValueError: if batch sizes of the inputs are inconsistent, or if + the number of anchors inferred from encoded_boxes and anchors are + inconsistent. + """ + encoded_boxes.get_shape().assert_has_rank(3) + if (shape_utils.get_dim_as_int(encoded_boxes.get_shape()[1]) + != anchors.num_boxes_static()): + raise ValueError('The number of anchors inferred from encoded_boxes' + ' and anchors are inconsistent: shape[1] of encoded_boxes' + ' %s should be equal to the number of anchors: %s.' % + (shape_utils.get_dim_as_int(encoded_boxes.get_shape()[1]), + anchors.num_boxes_static())) + + decoded_boxes = tf.stack([ + box_coder.decode(boxes, anchors).get() + for boxes in tf.unstack(encoded_boxes) + ]) + return decoded_boxes diff --git a/core/box_coder_test.py b/core/box_coder_test.py new file mode 100644 index 0000000..c087a32 --- /dev/null +++ b/core/box_coder_test.py @@ -0,0 +1,61 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.box_coder.""" + +import tensorflow as tf + +from object_detection.core import box_coder +from object_detection.core import box_list + + +class MockBoxCoder(box_coder.BoxCoder): + """Test BoxCoder that encodes/decodes using the multiply-by-two function.""" + + def code_size(self): + return 4 + + def _encode(self, boxes, anchors): + return 2.0 * boxes.get() + + def _decode(self, rel_codes, anchors): + return box_list.BoxList(rel_codes / 2.0) + + +class BoxCoderTest(tf.test.TestCase): + + def test_batch_decode(self): + mock_anchor_corners = tf.constant( + [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) + mock_anchors = box_list.BoxList(mock_anchor_corners) + mock_box_coder = MockBoxCoder() + + expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]], + [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]] + + encoded_boxes_list = [mock_box_coder.encode( + box_list.BoxList(tf.constant(boxes)), mock_anchors) + for boxes in expected_boxes] + encoded_boxes = tf.stack(encoded_boxes_list) + decoded_boxes = box_coder.batch_decode( + encoded_boxes, mock_box_coder, mock_anchors) + + with self.test_session() as sess: + decoded_boxes_result = sess.run(decoded_boxes) + self.assertAllClose(expected_boxes, decoded_boxes_result) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/box_list.py b/core/box_list.py new file mode 100644 index 0000000..cda7575 --- /dev/null +++ b/core/box_list.py @@ -0,0 +1,210 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Bounding Box List definition. + +BoxList represents a list of bounding boxes as tensorflow +tensors, where each bounding box is represented as a row of 4 numbers, +[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes +within a given list correspond to a single image. See also +box_list_ops.py for common box related operations (such as area, iou, etc). + +Optionally, users can add additional related fields (such as weights). +We assume the following things to be true about fields: +* they correspond to boxes in the box_list along the 0th dimension +* they have inferrable rank at graph construction time +* all dimensions except for possibly the 0th can be inferred + (i.e., not None) at graph construction time. + +Some other notes: + * Following tensorflow conventions, we use height, width ordering, + and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering + * Tensors are always provided as (flat) [N, 4] tensors. +""" + +import tensorflow as tf + +from object_detection.utils import shape_utils + + +class BoxList(object): + """Box collection.""" + + def __init__(self, boxes): + """Constructs box collection. + + Args: + boxes: a tensor of shape [N, 4] representing box corners + + Raises: + ValueError: if invalid dimensions for bbox data or if bbox data is not in + float32 format. + """ + if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: + raise ValueError('Invalid dimensions for box data: {}'.format( + boxes.shape)) + if boxes.dtype != tf.float32: + raise ValueError('Invalid tensor type: should be tf.float32') + self.data = {'boxes': boxes} + + def num_boxes(self): + """Returns number of boxes held in collection. + + Returns: + a tensor representing the number of boxes held in the collection. + """ + return tf.shape(self.data['boxes'])[0] + + def num_boxes_static(self): + """Returns number of boxes held in collection. + + This number is inferred at graph construction time rather than run-time. + + Returns: + Number of boxes held in collection (integer) or None if this is not + inferrable at graph construction time. + """ + return shape_utils.get_dim_as_int(self.data['boxes'].get_shape()[0]) + + def get_all_fields(self): + """Returns all fields.""" + return self.data.keys() + + def get_extra_fields(self): + """Returns all non-box fields (i.e., everything not named 'boxes').""" + return [k for k in self.data.keys() if k != 'boxes'] + + def add_field(self, field, field_data): + """Add field to box list. + + This method can be used to add related box data such as + weights/labels, etc. + + Args: + field: a string key to access the data via `get` + field_data: a tensor containing the data to store in the BoxList + """ + self.data[field] = field_data + + def has_field(self, field): + return field in self.data + + def get(self): + """Convenience function for accessing box coordinates. + + Returns: + a tensor with shape [N, 4] representing box coordinates. + """ + return self.get_field('boxes') + + def set(self, boxes): + """Convenience function for setting box coordinates. + + Args: + boxes: a tensor of shape [N, 4] representing box corners + + Raises: + ValueError: if invalid dimensions for bbox data + """ + if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: + raise ValueError('Invalid dimensions for box data.') + self.data['boxes'] = boxes + + def get_field(self, field): + """Accesses a box collection and associated fields. + + This function returns specified field with object; if no field is specified, + it returns the box coordinates. + + Args: + field: this optional string parameter can be used to specify + a related field to be accessed. + + Returns: + a tensor representing the box collection or an associated field. + + Raises: + ValueError: if invalid field + """ + if not self.has_field(field): + raise ValueError('field ' + str(field) + ' does not exist') + return self.data[field] + + def set_field(self, field, value): + """Sets the value of a field. + + Updates the field of a box_list with a given value. + + Args: + field: (string) name of the field to set value. + value: the value to assign to the field. + + Raises: + ValueError: if the box_list does not have specified field. + """ + if not self.has_field(field): + raise ValueError('field %s does not exist' % field) + self.data[field] = value + + def get_center_coordinates_and_sizes(self, scope=None): + """Computes the center coordinates, height and width of the boxes. + + Args: + scope: name scope of the function. + + Returns: + a list of 4 1-D tensors [ycenter, xcenter, height, width]. + """ + with tf.name_scope(scope, 'get_center_coordinates_and_sizes'): + box_corners = self.get() + ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners)) + width = xmax - xmin + height = ymax - ymin + ycenter = ymin + height / 2. + xcenter = xmin + width / 2. + return [ycenter, xcenter, height, width] + + def transpose_coordinates(self, scope=None): + """Transpose the coordinate representation in a boxlist. + + Args: + scope: name scope of the function. + """ + with tf.name_scope(scope, 'transpose_coordinates'): + y_min, x_min, y_max, x_max = tf.split( + value=self.get(), num_or_size_splits=4, axis=1) + self.set(tf.concat([x_min, y_min, x_max, y_max], 1)) + + def as_tensor_dict(self, fields=None): + """Retrieves specified fields as a dictionary of tensors. + + Args: + fields: (optional) list of fields to return in the dictionary. + If None (default), all fields are returned. + + Returns: + tensor_dict: A dictionary of tensors specified by fields. + + Raises: + ValueError: if specified field is not contained in boxlist. + """ + tensor_dict = {} + if fields is None: + fields = self.get_all_fields() + for field in fields: + if not self.has_field(field): + raise ValueError('boxlist must contain all specified fields') + tensor_dict[field] = self.get_field(field) + return tensor_dict diff --git a/core/box_list_ops.py b/core/box_list_ops.py new file mode 100644 index 0000000..0bd3788 --- /dev/null +++ b/core/box_list_ops.py @@ -0,0 +1,1141 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Bounding Box List operations. + +Example box operations that are supported: + * areas: compute bounding box areas + * iou: pairwise intersection-over-union scores + * sq_dist: pairwise distances between bounding boxes + +Whenever box_list_ops functions output a BoxList, the fields of the incoming +BoxList are retained unless documented otherwise. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import range +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.utils import ops +from object_detection.utils import shape_utils + + +class SortOrder(object): + """Enum class for sort order. + + Attributes: + ascend: ascend order. + descend: descend order. + """ + ascend = 1 + descend = 2 + + +def area(boxlist, scope=None): + """Computes area of boxes. + + Args: + boxlist: BoxList holding N boxes + scope: name scope. + + Returns: + a tensor with shape [N] representing box areas. + """ + with tf.name_scope(scope, 'Area'): + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + return tf.squeeze((y_max - y_min) * (x_max - x_min), [1]) + + +def height_width(boxlist, scope=None): + """Computes height and width of boxes in boxlist. + + Args: + boxlist: BoxList holding N boxes + scope: name scope. + + Returns: + Height: A tensor with shape [N] representing box heights. + Width: A tensor with shape [N] representing box widths. + """ + with tf.name_scope(scope, 'HeightWidth'): + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1]) + + +def scale(boxlist, y_scale, x_scale, scope=None): + """scale box coordinates in x and y dimensions. + + Args: + boxlist: BoxList holding N boxes + y_scale: (float) scalar tensor + x_scale: (float) scalar tensor + scope: name scope. + + Returns: + boxlist: BoxList holding N boxes + """ + with tf.name_scope(scope, 'Scale'): + y_scale = tf.cast(y_scale, tf.float32) + x_scale = tf.cast(x_scale, tf.float32) + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + y_min = y_scale * y_min + y_max = y_scale * y_max + x_min = x_scale * x_min + x_max = x_scale * x_max + scaled_boxlist = box_list.BoxList( + tf.concat([y_min, x_min, y_max, x_max], 1)) + return _copy_extra_fields(scaled_boxlist, boxlist) + + +def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): + """Clip bounding boxes to a window. + + This op clips any input bounding boxes (represented by bounding box + corners) to a window, optionally filtering out boxes that do not + overlap at all with the window. + + Args: + boxlist: BoxList holding M_in boxes + window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] + window to which the op should clip boxes. + filter_nonoverlapping: whether to filter out boxes that do not overlap at + all with the window. + scope: name scope. + + Returns: + a BoxList holding M_out boxes where M_out <= M_in + """ + with tf.name_scope(scope, 'ClipToWindow'): + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) + y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) + x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) + x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min) + clipped = box_list.BoxList( + tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped], + 1)) + clipped = _copy_extra_fields(clipped, boxlist) + if filter_nonoverlapping: + areas = area(clipped) + nonzero_area_indices = tf.cast( + tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32) + clipped = gather(clipped, nonzero_area_indices) + return clipped + + +def prune_outside_window(boxlist, window, scope=None): + """Prunes bounding boxes that fall outside a given window. + + This function prunes bounding boxes that even partially fall outside the given + window. See also clip_to_window which only prunes bounding boxes that fall + completely outside the window, and clips any bounding boxes that partially + overflow. + + Args: + boxlist: a BoxList holding M_in boxes. + window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] + of the window + scope: name scope. + + Returns: + pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in + valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes + in the input tensor. + """ + with tf.name_scope(scope, 'PruneOutsideWindow'): + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + coordinate_violations = tf.concat([ + tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), + tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) + ], 1) + valid_indices = tf.reshape( + tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) + return gather(boxlist, valid_indices), valid_indices + + +def prune_completely_outside_window(boxlist, window, scope=None): + """Prunes bounding boxes that fall completely outside of the given window. + + The function clip_to_window prunes bounding boxes that fall + completely outside the window, but also clips any bounding boxes that + partially overflow. This function does not clip partially overflowing boxes. + + Args: + boxlist: a BoxList holding M_in boxes. + window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] + of the window + scope: name scope. + + Returns: + pruned_boxlist: a new BoxList with all bounding boxes partially or fully in + the window. + valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes + in the input tensor. + """ + with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): + y_min, x_min, y_max, x_max = tf.split( + value=boxlist.get(), num_or_size_splits=4, axis=1) + win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + coordinate_violations = tf.concat([ + tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), + tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) + ], 1) + valid_indices = tf.reshape( + tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) + return gather(boxlist, valid_indices), valid_indices + + +def intersection(boxlist1, boxlist2, scope=None): + """Compute pairwise intersection areas between boxes. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing pairwise intersections + """ + with tf.name_scope(scope, 'Intersection'): + y_min1, x_min1, y_max1, x_max1 = tf.split( + value=boxlist1.get(), num_or_size_splits=4, axis=1) + y_min2, x_min2, y_max2, x_max2 = tf.split( + value=boxlist2.get(), num_or_size_splits=4, axis=1) + all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2)) + all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2)) + intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin) + all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2)) + all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2)) + intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin) + return intersect_heights * intersect_widths + + +def matched_intersection(boxlist1, boxlist2, scope=None): + """Compute intersection areas between corresponding boxes in two boxlists. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding N boxes + scope: name scope. + + Returns: + a tensor with shape [N] representing pairwise intersections + """ + with tf.name_scope(scope, 'MatchedIntersection'): + y_min1, x_min1, y_max1, x_max1 = tf.split( + value=boxlist1.get(), num_or_size_splits=4, axis=1) + y_min2, x_min2, y_max2, x_max2 = tf.split( + value=boxlist2.get(), num_or_size_splits=4, axis=1) + min_ymax = tf.minimum(y_max1, y_max2) + max_ymin = tf.maximum(y_min1, y_min2) + intersect_heights = tf.maximum(0.0, min_ymax - max_ymin) + min_xmax = tf.minimum(x_max1, x_max2) + max_xmin = tf.maximum(x_min1, x_min2) + intersect_widths = tf.maximum(0.0, min_xmax - max_xmin) + return tf.reshape(intersect_heights * intersect_widths, [-1]) + + +def iou(boxlist1, boxlist2, scope=None): + """Computes pairwise intersection-over-union between box collections. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing pairwise iou scores. + """ + with tf.name_scope(scope, 'IOU'): + intersections = intersection(boxlist1, boxlist2) + areas1 = area(boxlist1) + areas2 = area(boxlist2) + unions = ( + tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) + return tf.where( + tf.equal(intersections, 0.0), + tf.zeros_like(intersections), tf.truediv(intersections, unions)) + + +def matched_iou(boxlist1, boxlist2, scope=None): + """Compute intersection-over-union between corresponding boxes in boxlists. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding N boxes + scope: name scope. + + Returns: + a tensor with shape [N] representing pairwise iou scores. + """ + with tf.name_scope(scope, 'MatchedIOU'): + intersections = matched_intersection(boxlist1, boxlist2) + areas1 = area(boxlist1) + areas2 = area(boxlist2) + unions = areas1 + areas2 - intersections + return tf.where( + tf.equal(intersections, 0.0), + tf.zeros_like(intersections), tf.truediv(intersections, unions)) + + +def ioa(boxlist1, boxlist2, scope=None): + """Computes pairwise intersection-over-area between box collections. + + intersection-over-area (IOA) between two boxes box1 and box2 is defined as + their intersection area over box2's area. Note that ioa is not symmetric, + that is, ioa(box1, box2) != ioa(box2, box1). + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing pairwise ioa scores. + """ + with tf.name_scope(scope, 'IOA'): + intersections = intersection(boxlist1, boxlist2) + areas = tf.expand_dims(area(boxlist2), 0) + return tf.truediv(intersections, areas) + + +def prune_non_overlapping_boxes( + boxlist1, boxlist2, min_overlap=0.0, scope=None): + """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. + + For each box in boxlist1, we want its IOA to be more than minoverlap with + at least one of the boxes in boxlist2. If it does not, we remove it. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + min_overlap: Minimum required overlap between boxes, to count them as + overlapping. + scope: name scope. + + Returns: + new_boxlist1: A pruned boxlist with size [N', 4]. + keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the + first input BoxList `boxlist1`. + """ + with tf.name_scope(scope, 'PruneNonOverlappingBoxes'): + ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor + ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor + keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap)) + keep_inds = tf.squeeze(tf.where(keep_bool), axis=[1]) + new_boxlist1 = gather(boxlist1, keep_inds) + return new_boxlist1, keep_inds + + +def prune_small_boxes(boxlist, min_side, scope=None): + """Prunes small boxes in the boxlist which have a side smaller than min_side. + + Args: + boxlist: BoxList holding N boxes. + min_side: Minimum width AND height of box to survive pruning. + scope: name scope. + + Returns: + A pruned boxlist. + """ + with tf.name_scope(scope, 'PruneSmallBoxes'): + height, width = height_width(boxlist) + is_valid = tf.logical_and(tf.greater_equal(width, min_side), + tf.greater_equal(height, min_side)) + return gather(boxlist, tf.reshape(tf.where(is_valid), [-1])) + + +def change_coordinate_frame(boxlist, window, scope=None): + """Change coordinate frame of the boxlist to be relative to window's frame. + + Given a window of the form [ymin, xmin, ymax, xmax], + changes bounding box coordinates from boxlist to be relative to this window + (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). + + An example use case is data augmentation: where we are given groundtruth + boxes (boxlist) and would like to randomly crop the image to some + window (window). In this case we need to change the coordinate frame of + each groundtruth box to be relative to this new window. + + Args: + boxlist: A BoxList object holding N boxes. + window: A rank 1 tensor [4]. + scope: name scope. + + Returns: + Returns a BoxList object with N boxes. + """ + with tf.name_scope(scope, 'ChangeCoordinateFrame'): + win_height = window[2] - window[0] + win_width = window[3] - window[1] + boxlist_new = scale(box_list.BoxList( + boxlist.get() - [window[0], window[1], window[0], window[1]]), + 1.0 / win_height, 1.0 / win_width) + boxlist_new = _copy_extra_fields(boxlist_new, boxlist) + return boxlist_new + + +def sq_dist(boxlist1, boxlist2, scope=None): + """Computes the pairwise squared distances between box corners. + + This op treats each box as if it were a point in a 4d Euclidean space and + computes pairwise squared distances. + + Mathematically, we are given two matrices of box coordinates X and Y, + where X(i,:) is the i'th row of X, containing the 4 numbers defining the + corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to + boxlist2. We compute + Z(i,j) = ||X(i,:) - Y(j,:)||^2 + = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:), + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing pairwise distances + """ + with tf.name_scope(scope, 'SqDist'): + sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) + sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) + innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), + transpose_a=False, transpose_b=True) + return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod + + +def boolean_mask(boxlist, indicator, fields=None, scope=None, + use_static_shapes=False, indicator_sum=None): + """Select boxes from BoxList according to indicator and return new BoxList. + + `boolean_mask` returns the subset of boxes that are marked as "True" by the + indicator tensor. By default, `boolean_mask` returns boxes corresponding to + the input index list, as well as all additional fields stored in the boxlist + (indexing into the first dimension). However one can optionally only draw + from a subset of fields. + + Args: + boxlist: BoxList holding N boxes + indicator: a rank-1 boolean tensor + fields: (optional) list of fields to also gather from. If None (default), + all fields are gathered from. Pass an empty fields list to only gather + the box coordinates. + scope: name scope. + use_static_shapes: Whether to use an implementation with static shape + gurantees. + indicator_sum: An integer containing the sum of `indicator` vector. Only + required if `use_static_shape` is True. + + Returns: + subboxlist: a BoxList corresponding to the subset of the input BoxList + specified by indicator + Raises: + ValueError: if `indicator` is not a rank-1 boolean tensor. + """ + with tf.name_scope(scope, 'BooleanMask'): + if indicator.shape.ndims != 1: + raise ValueError('indicator should have rank 1') + if indicator.dtype != tf.bool: + raise ValueError('indicator should be a boolean tensor') + if use_static_shapes: + if not (indicator_sum and isinstance(indicator_sum, int)): + raise ValueError('`indicator_sum` must be a of type int') + selected_positions = tf.cast(indicator, dtype=tf.float32) + indexed_positions = tf.cast( + tf.multiply( + tf.cumsum(selected_positions), selected_positions), + dtype=tf.int32) + one_hot_selector = tf.one_hot( + indexed_positions - 1, indicator_sum, dtype=tf.float32) + sampled_indices = tf.cast( + tf.tensordot( + tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32), + one_hot_selector, + axes=[0, 0]), + dtype=tf.int32) + return gather(boxlist, sampled_indices, use_static_shapes=True) + else: + subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) + if fields is None: + fields = boxlist.get_extra_fields() + for field in fields: + if not boxlist.has_field(field): + raise ValueError('boxlist must contain all specified fields') + subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) + subboxlist.add_field(field, subfieldlist) + return subboxlist + + +def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False): + """Gather boxes from BoxList according to indices and return new BoxList. + + By default, `gather` returns boxes corresponding to the input index list, as + well as all additional fields stored in the boxlist (indexing into the + first dimension). However one can optionally only gather from a + subset of fields. + + Args: + boxlist: BoxList holding N boxes + indices: a rank-1 tensor of type int32 / int64 + fields: (optional) list of fields to also gather from. If None (default), + all fields are gathered from. Pass an empty fields list to only gather + the box coordinates. + scope: name scope. + use_static_shapes: Whether to use an implementation with static shape + gurantees. + + Returns: + subboxlist: a BoxList corresponding to the subset of the input BoxList + specified by indices + Raises: + ValueError: if specified field is not contained in boxlist or if the + indices are not of type int32 + """ + with tf.name_scope(scope, 'Gather'): + if len(indices.shape.as_list()) != 1: + raise ValueError('indices should have rank 1') + if indices.dtype != tf.int32 and indices.dtype != tf.int64: + raise ValueError('indices should be an int32 / int64 tensor') + gather_op = tf.gather + if use_static_shapes: + gather_op = ops.matmul_gather_on_zeroth_axis + subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices)) + if fields is None: + fields = boxlist.get_extra_fields() + fields += ['boxes'] + for field in fields: + if not boxlist.has_field(field): + raise ValueError('boxlist must contain all specified fields') + subfieldlist = gather_op(boxlist.get_field(field), indices) + subboxlist.add_field(field, subfieldlist) + return subboxlist + + +def concatenate(boxlists, fields=None, scope=None): + """Concatenate list of BoxLists. + + This op concatenates a list of input BoxLists into a larger BoxList. It also + handles concatenation of BoxList fields as long as the field tensor shapes + are equal except for the first dimension. + + Args: + boxlists: list of BoxList objects + fields: optional list of fields to also concatenate. By default, all + fields from the first BoxList in the list are included in the + concatenation. + scope: name scope. + + Returns: + a BoxList with number of boxes equal to + sum([boxlist.num_boxes() for boxlist in BoxList]) + Raises: + ValueError: if boxlists is invalid (i.e., is not a list, is empty, or + contains non BoxList objects), or if requested fields are not contained in + all boxlists + """ + with tf.name_scope(scope, 'Concatenate'): + if not isinstance(boxlists, list): + raise ValueError('boxlists should be a list') + if not boxlists: + raise ValueError('boxlists should have nonzero length') + for boxlist in boxlists: + if not isinstance(boxlist, box_list.BoxList): + raise ValueError('all elements of boxlists should be BoxList objects') + concatenated = box_list.BoxList( + tf.concat([boxlist.get() for boxlist in boxlists], 0)) + if fields is None: + fields = boxlists[0].get_extra_fields() + for field in fields: + first_field_shape = boxlists[0].get_field(field).get_shape().as_list() + first_field_shape[0] = -1 + if None in first_field_shape: + raise ValueError('field %s must have fully defined shape except for the' + ' 0th dimension.' % field) + for boxlist in boxlists: + if not boxlist.has_field(field): + raise ValueError('boxlist must contain all requested fields') + field_shape = boxlist.get_field(field).get_shape().as_list() + field_shape[0] = -1 + if field_shape != first_field_shape: + raise ValueError('field %s must have same shape for all boxlists ' + 'except for the 0th dimension.' % field) + concatenated_field = tf.concat( + [boxlist.get_field(field) for boxlist in boxlists], 0) + concatenated.add_field(field, concatenated_field) + return concatenated + + +def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): + """Sort boxes and associated fields according to a scalar field. + + A common use case is reordering the boxes according to descending scores. + + Args: + boxlist: BoxList holding N boxes. + field: A BoxList field for sorting and reordering the BoxList. + order: (Optional) descend or ascend. Default is descend. + scope: name scope. + + Returns: + sorted_boxlist: A sorted BoxList with the field in the specified order. + + Raises: + ValueError: if specified field does not exist + ValueError: if the order is not either descend or ascend + """ + with tf.name_scope(scope, 'SortByField'): + if order != SortOrder.descend and order != SortOrder.ascend: + raise ValueError('Invalid sort order') + + field_to_sort = boxlist.get_field(field) + if len(field_to_sort.shape.as_list()) != 1: + raise ValueError('Field should have rank 1') + + num_boxes = boxlist.num_boxes() + num_entries = tf.size(field_to_sort) + length_assert = tf.Assert( + tf.equal(num_boxes, num_entries), + ['Incorrect field size: actual vs expected.', num_entries, num_boxes]) + + with tf.control_dependencies([length_assert]): + _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True) + + if order == SortOrder.ascend: + sorted_indices = tf.reverse_v2(sorted_indices, [0]) + + return gather(boxlist, sorted_indices) + + +def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): + """Overlay bounding box list on image. + + Currently this visualization plots a 1 pixel thick red bounding box on top + of the image. Note that tf.image.draw_bounding_boxes essentially is + 1 indexed. + + Args: + image: an image tensor with shape [height, width, 3] + boxlist: a BoxList + normalized: (boolean) specify whether corners are to be interpreted + as absolute coordinates in image space or normalized with respect to the + image size. + scope: name scope. + + Returns: + image_and_boxes: an image tensor with shape [height, width, 3] + """ + with tf.name_scope(scope, 'VisualizeBoxesInImage'): + if not normalized: + height, width, _ = tf.unstack(tf.shape(image)) + boxlist = scale(boxlist, + 1.0 / tf.cast(height, tf.float32), + 1.0 / tf.cast(width, tf.float32)) + corners = tf.expand_dims(boxlist.get(), 0) + image = tf.expand_dims(image, 0) + return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0]) + + +def filter_field_value_equals(boxlist, field, value, scope=None): + """Filter to keep only boxes with field entries equal to the given value. + + Args: + boxlist: BoxList holding N boxes. + field: field name for filtering. + value: scalar value. + scope: name scope. + + Returns: + a BoxList holding M boxes where M <= N + + Raises: + ValueError: if boxlist not a BoxList object or if it does not have + the specified field. + """ + with tf.name_scope(scope, 'FilterFieldValueEquals'): + if not isinstance(boxlist, box_list.BoxList): + raise ValueError('boxlist must be a BoxList') + if not boxlist.has_field(field): + raise ValueError('boxlist must contain the specified field') + filter_field = boxlist.get_field(field) + gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1]) + return gather(boxlist, gather_index) + + +def filter_greater_than(boxlist, thresh, scope=None): + """Filter to keep only boxes with score exceeding a given threshold. + + This op keeps the collection of boxes whose corresponding scores are + greater than the input threshold. + + TODO(jonathanhuang): Change function name to filter_scores_greater_than + + Args: + boxlist: BoxList holding N boxes. Must contain a 'scores' field + representing detection scores. + thresh: scalar threshold + scope: name scope. + + Returns: + a BoxList holding M boxes where M <= N + + Raises: + ValueError: if boxlist not a BoxList object or if it does not + have a scores field + """ + with tf.name_scope(scope, 'FilterGreaterThan'): + if not isinstance(boxlist, box_list.BoxList): + raise ValueError('boxlist must be a BoxList') + if not boxlist.has_field('scores'): + raise ValueError('input boxlist must have \'scores\' field') + scores = boxlist.get_field('scores') + if len(scores.shape.as_list()) > 2: + raise ValueError('Scores should have rank 1 or 2') + if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1: + raise ValueError('Scores should have rank 1 or have shape ' + 'consistent with [None, 1]') + high_score_indices = tf.cast(tf.reshape( + tf.where(tf.greater(scores, thresh)), + [-1]), tf.int32) + return gather(boxlist, high_score_indices) + + +def non_max_suppression(boxlist, thresh, max_output_size, scope=None): + """Non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. Note that this only works for a single class --- + to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression. + + Args: + boxlist: BoxList holding N boxes. Must contain a 'scores' field + representing detection scores. + thresh: scalar threshold + max_output_size: maximum number of retained boxes + scope: name scope. + + Returns: + a BoxList holding M boxes where M <= max_output_size + Raises: + ValueError: if thresh is not in [0, 1] + """ + with tf.name_scope(scope, 'NonMaxSuppression'): + if not 0 <= thresh <= 1.0: + raise ValueError('thresh must be between 0 and 1') + if not isinstance(boxlist, box_list.BoxList): + raise ValueError('boxlist must be a BoxList') + if not boxlist.has_field('scores'): + raise ValueError('input boxlist must have \'scores\' field') + selected_indices = tf.image.non_max_suppression( + boxlist.get(), boxlist.get_field('scores'), + max_output_size, iou_threshold=thresh) + return gather(boxlist, selected_indices) + + +def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): + """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to. + + Args: + boxlist_to_copy_to: BoxList to which extra fields are copied. + boxlist_to_copy_from: BoxList from which fields are copied. + + Returns: + boxlist_to_copy_to with extra fields. + """ + for field in boxlist_to_copy_from.get_extra_fields(): + boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field)) + return boxlist_to_copy_to + + +def to_normalized_coordinates(boxlist, height, width, + check_range=True, scope=None): + """Converts absolute box coordinates to normalized coordinates in [0, 1]. + + Usually one uses the dynamic shape of the image or conv-layer tensor: + boxlist = box_list_ops.to_normalized_coordinates(boxlist, + tf.shape(images)[1], + tf.shape(images)[2]), + + This function raises an assertion failed error at graph execution time when + the maximum coordinate is smaller than 1.01 (which means that coordinates are + already normalized). The value 1.01 is to deal with small rounding errors. + + Args: + boxlist: BoxList with coordinates in terms of pixel-locations. + height: Maximum value for height of absolute box coordinates. + width: Maximum value for width of absolute box coordinates. + check_range: If True, checks if the coordinates are normalized or not. + scope: name scope. + + Returns: + boxlist with normalized coordinates in [0, 1]. + """ + with tf.name_scope(scope, 'ToNormalizedCoordinates'): + height = tf.cast(height, tf.float32) + width = tf.cast(width, tf.float32) + + if check_range: + max_val = tf.reduce_max(boxlist.get()) + max_assert = tf.Assert(tf.greater(max_val, 1.01), + ['max value is lower than 1.01: ', max_val]) + with tf.control_dependencies([max_assert]): + width = tf.identity(width) + + return scale(boxlist, 1 / height, 1 / width) + + +def to_absolute_coordinates(boxlist, + height, + width, + check_range=True, + maximum_normalized_coordinate=1.1, + scope=None): + """Converts normalized box coordinates to absolute pixel coordinates. + + This function raises an assertion failed error when the maximum box coordinate + value is larger than maximum_normalized_coordinate (in which case coordinates + are already absolute). + + Args: + boxlist: BoxList with coordinates in range [0, 1]. + height: Maximum value for height of absolute box coordinates. + width: Maximum value for width of absolute box coordinates. + check_range: If True, checks if the coordinates are normalized or not. + maximum_normalized_coordinate: Maximum coordinate value to be considered + as normalized, default to 1.1. + scope: name scope. + + Returns: + boxlist with absolute coordinates in terms of the image size. + + """ + with tf.name_scope(scope, 'ToAbsoluteCoordinates'): + height = tf.cast(height, tf.float32) + width = tf.cast(width, tf.float32) + + # Ensure range of input boxes is correct. + if check_range: + box_maximum = tf.reduce_max(boxlist.get()) + max_assert = tf.Assert( + tf.greater_equal(maximum_normalized_coordinate, box_maximum), + ['maximum box coordinate value is larger ' + 'than %f: ' % maximum_normalized_coordinate, box_maximum]) + with tf.control_dependencies([max_assert]): + width = tf.identity(width) + + return scale(boxlist, height, width) + + +def refine_boxes_multi_class(pool_boxes, + num_classes, + nms_iou_thresh, + nms_max_detections, + voting_iou_thresh=0.5): + """Refines a pool of boxes using non max suppression and box voting. + + Box refinement is done independently for each class. + + Args: + pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must + have a rank 1 'scores' field and a rank 1 'classes' field. + num_classes: (int scalar) Number of classes. + nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). + nms_max_detections: (int scalar) maximum output size for NMS. + voting_iou_thresh: (float scalar) iou threshold for box voting. + + Returns: + BoxList of refined boxes. + + Raises: + ValueError: if + a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. + b) pool_boxes is not a BoxList. + c) pool_boxes does not have a scores and classes field. + """ + if not 0.0 <= nms_iou_thresh <= 1.0: + raise ValueError('nms_iou_thresh must be between 0 and 1') + if not 0.0 <= voting_iou_thresh <= 1.0: + raise ValueError('voting_iou_thresh must be between 0 and 1') + if not isinstance(pool_boxes, box_list.BoxList): + raise ValueError('pool_boxes must be a BoxList') + if not pool_boxes.has_field('scores'): + raise ValueError('pool_boxes must have a \'scores\' field') + if not pool_boxes.has_field('classes'): + raise ValueError('pool_boxes must have a \'classes\' field') + + refined_boxes = [] + for i in range(num_classes): + boxes_class = filter_field_value_equals(pool_boxes, 'classes', i) + refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh, + nms_max_detections, voting_iou_thresh) + refined_boxes.append(refined_boxes_class) + return sort_by_field(concatenate(refined_boxes), 'scores') + + +def refine_boxes(pool_boxes, + nms_iou_thresh, + nms_max_detections, + voting_iou_thresh=0.5): + """Refines a pool of boxes using non max suppression and box voting. + + Args: + pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must + have a rank 1 'scores' field. + nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). + nms_max_detections: (int scalar) maximum output size for NMS. + voting_iou_thresh: (float scalar) iou threshold for box voting. + + Returns: + BoxList of refined boxes. + + Raises: + ValueError: if + a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. + b) pool_boxes is not a BoxList. + c) pool_boxes does not have a scores field. + """ + if not 0.0 <= nms_iou_thresh <= 1.0: + raise ValueError('nms_iou_thresh must be between 0 and 1') + if not 0.0 <= voting_iou_thresh <= 1.0: + raise ValueError('voting_iou_thresh must be between 0 and 1') + if not isinstance(pool_boxes, box_list.BoxList): + raise ValueError('pool_boxes must be a BoxList') + if not pool_boxes.has_field('scores'): + raise ValueError('pool_boxes must have a \'scores\' field') + + nms_boxes = non_max_suppression( + pool_boxes, nms_iou_thresh, nms_max_detections) + return box_voting(nms_boxes, pool_boxes, voting_iou_thresh) + + +def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): + """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. + + Performs box voting as described in 'Object detection via a multi-region & + semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For + each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes + with iou overlap >= iou_thresh. The location of B is set to the weighted + average location of boxes in S (scores are used for weighting). And the score + of B is set to the average score of boxes in S. + + Args: + selected_boxes: BoxList containing a subset of boxes in pool_boxes. These + boxes are usually selected from pool_boxes using non max suppression. + pool_boxes: BoxList containing a set of (possibly redundant) boxes. + iou_thresh: (float scalar) iou threshold for matching boxes in + selected_boxes and pool_boxes. + + Returns: + BoxList containing averaged locations and scores for each box in + selected_boxes. + + Raises: + ValueError: if + a) selected_boxes or pool_boxes is not a BoxList. + b) if iou_thresh is not in [0, 1]. + c) pool_boxes does not have a scores field. + """ + if not 0.0 <= iou_thresh <= 1.0: + raise ValueError('iou_thresh must be between 0 and 1') + if not isinstance(selected_boxes, box_list.BoxList): + raise ValueError('selected_boxes must be a BoxList') + if not isinstance(pool_boxes, box_list.BoxList): + raise ValueError('pool_boxes must be a BoxList') + if not pool_boxes.has_field('scores'): + raise ValueError('pool_boxes must have a \'scores\' field') + + iou_ = iou(selected_boxes, pool_boxes) + match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32) + num_matches = tf.reduce_sum(match_indicator, 1) + # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not + # match to any boxes in pool_boxes. For such boxes without any matches, we + # should return the original boxes without voting. + match_assert = tf.Assert( + tf.reduce_all(tf.greater(num_matches, 0)), + ['Each box in selected_boxes must match with at least one box ' + 'in pool_boxes.']) + + scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) + scores_assert = tf.Assert( + tf.reduce_all(tf.greater_equal(scores, 0)), + ['Scores must be non negative.']) + + with tf.control_dependencies([scores_assert, match_assert]): + sum_scores = tf.matmul(match_indicator, scores) + averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches + + box_locations = tf.matmul(match_indicator, + pool_boxes.get() * scores) / sum_scores + averaged_boxes = box_list.BoxList(box_locations) + _copy_extra_fields(averaged_boxes, selected_boxes) + averaged_boxes.add_field('scores', averaged_scores) + return averaged_boxes + + +def pad_or_clip_box_list(boxlist, num_boxes, scope=None): + """Pads or clips all fields of a BoxList. + + Args: + boxlist: A BoxList with arbitrary of number of boxes. + num_boxes: First num_boxes in boxlist are kept. + The fields are zero-padded if num_boxes is bigger than the + actual number of boxes. + scope: name scope. + + Returns: + BoxList with all fields padded or clipped. + """ + with tf.name_scope(scope, 'PadOrClipBoxList'): + subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor( + boxlist.get(), num_boxes)) + for field in boxlist.get_extra_fields(): + subfield = shape_utils.pad_or_clip_tensor( + boxlist.get_field(field), num_boxes) + subboxlist.add_field(field, subfield) + return subboxlist + + +def select_random_box(boxlist, + default_box=None, + seed=None, + scope=None): + """Selects a random bounding box from a `BoxList`. + + Args: + boxlist: A BoxList. + default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`, + this default box will be returned. If None, will use a default box of + [[-1., -1., -1., -1.]]. + seed: Random seed. + scope: Name scope. + + Returns: + bbox: A [1, 4] tensor with a random bounding box. + valid: A bool tensor indicating whether a valid bounding box is returned + (True) or whether the default box is returned (False). + """ + with tf.name_scope(scope, 'SelectRandomBox'): + bboxes = boxlist.get() + combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes) + number_of_boxes = combined_shape[0] + default_box = default_box or tf.constant([[-1., -1., -1., -1.]]) + + def select_box(): + random_index = tf.random_uniform([], + maxval=number_of_boxes, + dtype=tf.int32, + seed=seed) + return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True) + + return tf.cond( + tf.greater_equal(number_of_boxes, 1), + true_fn=select_box, + false_fn=lambda: (default_box, tf.constant(False))) + + +def get_minimal_coverage_box(boxlist, + default_box=None, + scope=None): + """Creates a single bounding box which covers all boxes in the boxlist. + + Args: + boxlist: A Boxlist. + default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`, + this default box will be returned. If None, will use a default box of + [[0., 0., 1., 1.]]. + scope: Name scope. + + Returns: + A [1, 4] float32 tensor with a bounding box that tightly covers all the + boxes in the box list. If the boxlist does not contain any boxes, the + default box is returned. + """ + with tf.name_scope(scope, 'CreateCoverageBox'): + num_boxes = boxlist.num_boxes() + + def coverage_box(bboxes): + y_min, x_min, y_max, x_max = tf.split( + value=bboxes, num_or_size_splits=4, axis=1) + y_min_coverage = tf.reduce_min(y_min, axis=0) + x_min_coverage = tf.reduce_min(x_min, axis=0) + y_max_coverage = tf.reduce_max(y_max, axis=0) + x_max_coverage = tf.reduce_max(x_max, axis=0) + return tf.stack( + [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage], + axis=1) + + default_box = default_box or tf.constant([[0., 0., 1., 1.]]) + return tf.cond( + tf.greater_equal(num_boxes, 1), + true_fn=lambda: coverage_box(boxlist.get()), + false_fn=lambda: default_box) + + +def sample_boxes_by_jittering(boxlist, + num_boxes_to_sample, + stddev=0.1, + scope=None): + """Samples num_boxes_to_sample boxes by jittering around boxlist boxes. + + It is possible that this function might generate boxes with size 0. The larger + the stddev, this is more probable. For a small stddev of 0.1 this probability + is very small. + + Args: + boxlist: A boxlist containing N boxes in normalized coordinates. + num_boxes_to_sample: A positive integer containing the number of boxes to + sample. + stddev: Standard deviation. This is used to draw random offsets for the + box corners from a normal distribution. The offset is multiplied by the + box size so will be larger in terms of pixels for larger boxes. + scope: Name scope. + + Returns: + sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in + normalized coordinates. + """ + with tf.name_scope(scope, 'SampleBoxesByJittering'): + num_boxes = boxlist.num_boxes() + box_indices = tf.random_uniform( + [num_boxes_to_sample], + minval=0, + maxval=num_boxes, + dtype=tf.int32) + sampled_boxes = tf.gather(boxlist.get(), box_indices) + sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0] + sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1] + rand_miny_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) + rand_minx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) + rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) + rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) + miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0] + minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1] + maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2] + maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3] + maxy = tf.maximum(miny, maxy) + maxx = tf.maximum(minx, maxx) + sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1) + sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0) + return box_list.BoxList(sampled_boxes) diff --git a/core/box_list_ops_test.py b/core/box_list_ops_test.py new file mode 100644 index 0000000..efe2991 --- /dev/null +++ b/core/box_list_ops_test.py @@ -0,0 +1,1108 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.box_list_ops.""" +import numpy as np +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.utils import test_case + + +class BoxListOpsTest(test_case.TestCase): + """Tests for common bounding box operations.""" + + def test_area(self): + corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) + exp_output = [200.0, 4.0] + boxes = box_list.BoxList(corners) + areas = box_list_ops.area(boxes) + with self.test_session() as sess: + areas_output = sess.run(areas) + self.assertAllClose(areas_output, exp_output) + + def test_height_width(self): + corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) + exp_output_heights = [10., 2.] + exp_output_widths = [20., 2.] + boxes = box_list.BoxList(corners) + heights, widths = box_list_ops.height_width(boxes) + with self.test_session() as sess: + output_heights, output_widths = sess.run([heights, widths]) + self.assertAllClose(output_heights, exp_output_heights) + self.assertAllClose(output_widths, exp_output_widths) + + def test_scale(self): + corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]], + dtype=tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.constant([[1], [2]])) + + y_scale = tf.constant(1.0/100) + x_scale = tf.constant(1.0/200) + scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale) + exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]] + with self.test_session() as sess: + scaled_corners_out = sess.run(scaled_boxes.get()) + self.assertAllClose(scaled_corners_out, exp_output) + extra_data_out = sess.run(scaled_boxes.get_field('extra_data')) + self.assertAllEqual(extra_data_out, [[1], [2]]) + + def test_clip_to_window_filter_boxes_which_fall_outside_the_window( + self): + window = tf.constant([0, 0, 9, 14], tf.float32) + corners = tf.constant([[5.0, 5.0, 6.0, 6.0], + [-1.0, -2.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0], + [-100.0, -100.0, 300.0, 600.0], + [-10.0, -10.0, -9.0, -9.0]]) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) + exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], + [0.0, 0.0, 9.0, 14.0]] + pruned = box_list_ops.clip_to_window( + boxes, window, filter_nonoverlapping=True) + with self.test_session() as sess: + pruned_output = sess.run(pruned.get()) + self.assertAllClose(pruned_output, exp_output) + extra_data_out = sess.run(pruned.get_field('extra_data')) + self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]]) + + def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window( + self): + window = tf.constant([0, 0, 9, 14], tf.float32) + corners = tf.constant([[5.0, 5.0, 6.0, 6.0], + [-1.0, -2.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0], + [-100.0, -100.0, 300.0, 600.0], + [-10.0, -10.0, -9.0, -9.0]]) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) + exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], + [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]] + pruned = box_list_ops.clip_to_window( + boxes, window, filter_nonoverlapping=False) + with self.test_session() as sess: + pruned_output = sess.run(pruned.get()) + self.assertAllClose(pruned_output, exp_output) + extra_data_out = sess.run(pruned.get_field('extra_data')) + self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]]) + + def test_prune_outside_window_filters_boxes_which_fall_outside_the_window( + self): + window = tf.constant([0, 0, 9, 14], tf.float32) + corners = tf.constant([[5.0, 5.0, 6.0, 6.0], + [-1.0, -2.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0], + [-10.0, -10.0, -9.0, -9.0], + [-100.0, -100.0, 300.0, 600.0]]) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) + exp_output = [[5.0, 5.0, 6.0, 6.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0]] + pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window) + with self.test_session() as sess: + pruned_output = sess.run(pruned.get()) + self.assertAllClose(pruned_output, exp_output) + keep_indices_out = sess.run(keep_indices) + self.assertAllEqual(keep_indices_out, [0, 2, 3]) + extra_data_out = sess.run(pruned.get_field('extra_data')) + self.assertAllEqual(extra_data_out, [[1], [3], [4]]) + + def test_prune_completely_outside_window(self): + window = tf.constant([0, 0, 9, 14], tf.float32) + corners = tf.constant([[5.0, 5.0, 6.0, 6.0], + [-1.0, -2.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0], + [-10.0, -10.0, -9.0, -9.0], + [-100.0, -100.0, 300.0, 600.0]]) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) + exp_output = [[5.0, 5.0, 6.0, 6.0], + [-1.0, -2.0, 4.0, 5.0], + [2.0, 3.0, 5.0, 9.0], + [0.0, 0.0, 9.0, 14.0], + [-100.0, -100.0, 300.0, 600.0]] + pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes, + window) + with self.test_session() as sess: + pruned_output = sess.run(pruned.get()) + self.assertAllClose(pruned_output, exp_output) + keep_indices_out = sess.run(keep_indices) + self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5]) + extra_data_out = sess.run(pruned.get_field('extra_data')) + self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]]) + + def test_prune_completely_outside_window_with_empty_boxlist(self): + window = tf.constant([0, 0, 9, 14], tf.float32) + corners = tf.zeros(shape=[0, 4], dtype=tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('extra_data', tf.zeros(shape=[0], dtype=tf.int32)) + pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes, + window) + pruned_boxes = pruned.get() + extra = pruned.get_field('extra_data') + + exp_pruned_boxes = np.zeros(shape=[0, 4], dtype=np.float32) + exp_extra = np.zeros(shape=[0], dtype=np.int32) + with self.test_session() as sess: + pruned_boxes_out, keep_indices_out, extra_out = sess.run( + [pruned_boxes, keep_indices, extra]) + self.assertAllClose(exp_pruned_boxes, pruned_boxes_out) + self.assertAllEqual([], keep_indices_out) + self.assertAllEqual(exp_extra, extra_out) + + def test_intersection(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + intersect = box_list_ops.intersection(boxes1, boxes2) + with self.test_session() as sess: + intersect_output = sess.run(intersect) + self.assertAllClose(intersect_output, exp_output) + + def test_matched_intersection(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) + exp_output = [2.0, 0.0] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + intersect = box_list_ops.matched_intersection(boxes1, boxes2) + with self.test_session() as sess: + intersect_output = sess.run(intersect) + self.assertAllClose(intersect_output, exp_output) + + def test_iou(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + iou = box_list_ops.iou(boxes1, boxes2) + with self.test_session() as sess: + iou_output = sess.run(iou) + self.assertAllClose(iou_output, exp_output) + + def test_matched_iou(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) + exp_output = [2.0 / 16.0, 0] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + iou = box_list_ops.matched_iou(boxes1, boxes2) + with self.test_session() as sess: + iou_output = sess.run(iou) + self.assertAllClose(iou_output, exp_output) + + def test_iouworks_on_empty_inputs(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + boxes_empty = box_list.BoxList(tf.zeros((0, 4))) + iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty) + iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2) + iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty) + with self.test_session() as sess: + iou_output_1, iou_output_2, iou_output_3 = sess.run( + [iou_empty_1, iou_empty_2, iou_empty_3]) + self.assertAllEqual(iou_output_1.shape, (2, 0)) + self.assertAllEqual(iou_output_2.shape, (0, 3)) + self.assertAllEqual(iou_output_3.shape, (0, 0)) + + def test_ioa(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], + [1.0 / 12.0, 0.0, 5.0 / 400.0]] + exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], + [0, 0], + [6.0 / 6.0, 5.0 / 5.0]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + ioa_1 = box_list_ops.ioa(boxes1, boxes2) + ioa_2 = box_list_ops.ioa(boxes2, boxes1) + with self.test_session() as sess: + ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2]) + self.assertAllClose(ioa_output_1, exp_output_1) + self.assertAllClose(ioa_output_2, exp_output_2) + + def test_prune_non_overlapping_boxes(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + minoverlap = 0.5 + + exp_output_1 = boxes1 + exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4])) + output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes( + boxes1, boxes2, min_overlap=minoverlap) + output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes( + boxes2, boxes1, min_overlap=minoverlap) + with self.test_session() as sess: + (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_, + exp_output_2_) = sess.run( + [output_1.get(), keep_indices_1, + output_2.get(), keep_indices_2, + exp_output_1.get(), exp_output_2.get()]) + self.assertAllClose(output_1_, exp_output_1_) + self.assertAllClose(output_2_, exp_output_2_) + self.assertAllEqual(keep_indices_1_, [0, 1]) + self.assertAllEqual(keep_indices_2_, []) + + def test_prune_small_boxes(self): + boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], + [5.0, 6.0, 10.0, 7.0], + [3.0, 4.0, 6.0, 8.0], + [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_boxes = [[3.0, 4.0, 6.0, 8.0], + [0.0, 0.0, 20.0, 20.0]] + boxes = box_list.BoxList(boxes) + pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) + with self.test_session() as sess: + pruned_boxes = sess.run(pruned_boxes.get()) + self.assertAllEqual(pruned_boxes, exp_boxes) + + def test_prune_small_boxes_prunes_boxes_with_negative_side(self): + boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], + [5.0, 6.0, 10.0, 7.0], + [3.0, 4.0, 6.0, 8.0], + [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0], + [2.0, 3.0, 1.5, 7.0], # negative height + [2.0, 3.0, 5.0, 1.7]]) # negative width + exp_boxes = [[3.0, 4.0, 6.0, 8.0], + [0.0, 0.0, 20.0, 20.0]] + boxes = box_list.BoxList(boxes) + pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) + with self.test_session() as sess: + pruned_boxes = sess.run(pruned_boxes.get()) + self.assertAllEqual(pruned_boxes, exp_boxes) + + def test_change_coordinate_frame(self): + corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]]) + window = tf.constant([0.25, 0.25, 0.75, 0.75]) + boxes = box_list.BoxList(corners) + + expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]]) + expected_boxes = box_list.BoxList(expected_corners) + output = box_list_ops.change_coordinate_frame(boxes, window) + + with self.test_session() as sess: + output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()]) + self.assertAllClose(output_, expected_boxes_) + + def test_ioaworks_on_empty_inputs(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + boxes_empty = box_list.BoxList(tf.zeros((0, 4))) + ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty) + ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2) + ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty) + with self.test_session() as sess: + ioa_output_1, ioa_output_2, ioa_output_3 = sess.run( + [ioa_empty_1, ioa_empty_2, ioa_empty_3]) + self.assertAllEqual(ioa_output_1.shape, (2, 0)) + self.assertAllEqual(ioa_output_2.shape, (0, 3)) + self.assertAllEqual(ioa_output_3.shape, (0, 0)) + + def test_pairwise_distances(self): + corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], + [1.0, 1.0, 0.0, 2.0]]) + corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], + [-4.0, 0.0, 0.0, 3.0], + [0.0, 0.0, 0.0, 0.0]]) + exp_output = [[26, 25, 0], [18, 27, 6]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + dist_matrix = box_list_ops.sq_dist(boxes1, boxes2) + with self.test_session() as sess: + dist_output = sess.run(dist_matrix) + self.assertAllClose(dist_output, exp_output) + + def test_boolean_mask(self): + corners = tf.constant( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) + indicator = tf.constant([True, False, True, False, True], tf.bool) + expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + boxes = box_list.BoxList(corners) + subset = box_list_ops.boolean_mask(boxes, indicator) + with self.test_session() as sess: + subset_output = sess.run(subset.get()) + self.assertAllClose(subset_output, expected_subset) + + def test_static_boolean_mask_with_field(self): + + def graph_fn(corners, weights, indicator): + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + subset = box_list_ops.boolean_mask( + boxes, + indicator, ['weights'], + use_static_shapes=True, + indicator_sum=3) + return (subset.get_field('boxes'), subset.get_field('weights')) + + corners = np.array( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]], + dtype=np.float32) + indicator = np.array([True, False, True, False, True], dtype=np.bool) + weights = np.array([[.1], [.3], [.5], [.7], [.9]], dtype=np.float32) + result_boxes, result_weights = self.execute(graph_fn, + [corners, weights, indicator]) + expected_boxes = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + expected_weights = [[.1], [.5], [.9]] + + self.assertAllClose(result_boxes, expected_boxes) + self.assertAllClose(result_weights, expected_weights) + + def test_dynamic_boolean_mask_with_field(self): + corners = tf.placeholder(tf.float32, [None, 4]) + indicator = tf.placeholder(tf.bool, [None]) + weights = tf.placeholder(tf.float32, [None, 1]) + expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + expected_weights = [[.1], [.5], [.9]] + + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + subset = box_list_ops.boolean_mask(boxes, indicator, ['weights']) + with self.test_session() as sess: + subset_output, weights_output = sess.run( + [subset.get(), subset.get_field('weights')], + feed_dict={ + corners: + np.array( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]), + indicator: + np.array([True, False, True, False, True]).astype(np.bool), + weights: + np.array([[.1], [.3], [.5], [.7], [.9]]) + }) + self.assertAllClose(subset_output, expected_subset) + self.assertAllClose(weights_output, expected_weights) + + def test_gather(self): + corners = tf.constant( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) + indices = tf.constant([0, 2, 4], tf.int32) + expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + boxes = box_list.BoxList(corners) + subset = box_list_ops.gather(boxes, indices) + with self.test_session() as sess: + subset_output = sess.run(subset.get()) + self.assertAllClose(subset_output, expected_subset) + + def test_static_gather_with_field(self): + + def graph_fn(corners, weights, indices): + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + subset = box_list_ops.gather( + boxes, indices, ['weights'], use_static_shapes=True) + return (subset.get_field('boxes'), subset.get_field('weights')) + + corners = np.array([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], + 4 * [4.0]], dtype=np.float32) + weights = np.array([[.1], [.3], [.5], [.7], [.9]], dtype=np.float32) + indices = np.array([0, 2, 4], dtype=np.int32) + + result_boxes, result_weights = self.execute(graph_fn, + [corners, weights, indices]) + expected_boxes = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + expected_weights = [[.1], [.5], [.9]] + self.assertAllClose(result_boxes, expected_boxes) + self.assertAllClose(result_weights, expected_weights) + + def test_dynamic_gather_with_field(self): + corners = tf.placeholder(tf.float32, [None, 4]) + indices = tf.placeholder(tf.int32, [None]) + weights = tf.placeholder(tf.float32, [None, 1]) + expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + expected_weights = [[.1], [.5], [.9]] + + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + subset = box_list_ops.gather(boxes, indices, ['weights'], + use_static_shapes=True) + with self.test_session() as sess: + subset_output, weights_output = sess.run( + [subset.get(), subset.get_field('weights')], + feed_dict={ + corners: + np.array( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]), + indices: + np.array([0, 2, 4]).astype(np.int32), + weights: + np.array([[.1], [.3], [.5], [.7], [.9]]) + }) + self.assertAllClose(subset_output, expected_subset) + self.assertAllClose(weights_output, expected_weights) + + def test_gather_with_invalid_field(self): + corners = tf.constant([4 * [0.0], 4 * [1.0]]) + indices = tf.constant([0, 1], tf.int32) + weights = tf.constant([[.1], [.3]], tf.float32) + + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + with self.assertRaises(ValueError): + box_list_ops.gather(boxes, indices, ['foo', 'bar']) + + def test_gather_with_invalid_inputs(self): + corners = tf.constant( + [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) + indices_float32 = tf.constant([0, 2, 4], tf.float32) + boxes = box_list.BoxList(corners) + with self.assertRaises(ValueError): + _ = box_list_ops.gather(boxes, indices_float32) + indices_2d = tf.constant([[0, 2, 4]], tf.int32) + boxes = box_list.BoxList(corners) + with self.assertRaises(ValueError): + _ = box_list_ops.gather(boxes, indices_2d) + + def test_gather_with_dynamic_indexing(self): + corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0] + ]) + weights = tf.constant([.5, .3, .7, .1, .9], tf.float32) + indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1]) + expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] + expected_weights = [.5, .7, .9] + + boxes = box_list.BoxList(corners) + boxes.add_field('weights', weights) + subset = box_list_ops.gather(boxes, indices, ['weights']) + with self.test_session() as sess: + subset_output, weights_output = sess.run([subset.get(), subset.get_field( + 'weights')]) + self.assertAllClose(subset_output, expected_subset) + self.assertAllClose(weights_output, expected_weights) + + def test_sort_by_field_ascending_order(self): + exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], + [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] + exp_scores = [.95, .9, .75, .6, .5, .3] + exp_weights = [.2, .45, .6, .75, .8, .92] + shuffle = [2, 4, 0, 5, 1, 3] + corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant( + [exp_scores[i] for i in shuffle], tf.float32)) + boxes.add_field('weights', tf.constant( + [exp_weights[i] for i in shuffle], tf.float32)) + sort_by_weight = box_list_ops.sort_by_field( + boxes, + 'weights', + order=box_list_ops.SortOrder.ascend) + with self.test_session() as sess: + corners_out, scores_out, weights_out = sess.run([ + sort_by_weight.get(), + sort_by_weight.get_field('scores'), + sort_by_weight.get_field('weights')]) + self.assertAllClose(corners_out, exp_corners) + self.assertAllClose(scores_out, exp_scores) + self.assertAllClose(weights_out, exp_weights) + + def test_sort_by_field_descending_order(self): + exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], + [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] + exp_scores = [.95, .9, .75, .6, .5, .3] + exp_weights = [.2, .45, .6, .75, .8, .92] + shuffle = [2, 4, 0, 5, 1, 3] + + corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant( + [exp_scores[i] for i in shuffle], tf.float32)) + boxes.add_field('weights', tf.constant( + [exp_weights[i] for i in shuffle], tf.float32)) + + sort_by_score = box_list_ops.sort_by_field(boxes, 'scores') + with self.test_session() as sess: + corners_out, scores_out, weights_out = sess.run([sort_by_score.get( + ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')]) + self.assertAllClose(corners_out, exp_corners) + self.assertAllClose(scores_out, exp_scores) + self.assertAllClose(weights_out, exp_weights) + + def test_sort_by_field_invalid_inputs(self): + corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 * + [3.0], 4 * [4.0]]) + misc = tf.constant([[.95, .9], [.5, .3]], tf.float32) + weights = tf.constant([.1, .2], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('misc', misc) + boxes.add_field('weights', weights) + + with self.assertRaises(ValueError): + box_list_ops.sort_by_field(boxes, 'area') + + with self.assertRaises(ValueError): + box_list_ops.sort_by_field(boxes, 'misc') + + with self.assertRaises(ValueError): + box_list_ops.sort_by_field(boxes, 'weights') + + def test_visualize_boxes_in_image(self): + image = tf.zeros((6, 4, 3)) + corners = tf.constant([[0, 0, 5, 3], + [0, 0, 3, 2]], tf.float32) + boxes = box_list.BoxList(corners) + image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes) + image_and_boxes_bw = tf.cast( + tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0), dtype=tf.float32) + exp_result = [[1, 1, 1, 0], + [1, 1, 1, 0], + [1, 1, 1, 0], + [1, 0, 1, 0], + [1, 1, 1, 0], + [0, 0, 0, 0]] + with self.test_session() as sess: + output = sess.run(image_and_boxes_bw) + self.assertAllEqual(output.astype(int), exp_result) + + def test_filter_field_value_equals(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1], + [0, -0.1, 1, 0.9], + [0, 10, 1, 11], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1])) + exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] + exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]] + + filtered_boxes1 = box_list_ops.filter_field_value_equals( + boxes, 'classes', 1) + filtered_boxes2 = box_list_ops.filter_field_value_equals( + boxes, 'classes', 2) + with self.test_session() as sess: + filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(), + filtered_boxes2.get()]) + self.assertAllClose(filtered_output1, exp_output1) + self.assertAllClose(filtered_output2, exp_output2) + + def test_filter_greater_than(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1], + [0, -0.1, 1, 0.9], + [0, 10, 1, 11], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8])) + thresh = .6 + exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] + + filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh) + with self.test_session() as sess: + filtered_output = sess.run(filtered_boxes.get()) + self.assertAllClose(filtered_output, exp_output) + + def test_clip_box_list(self): + boxlist = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], + [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) + boxlist.add_field('classes', tf.constant([0, 0, 1, 1])) + boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2])) + num_boxes = 2 + clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) + + expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] + expected_classes = [0, 0] + expected_scores = [0.75, 0.65] + with self.test_session() as sess: + boxes_out, classes_out, scores_out = sess.run( + [clipped_boxlist.get(), clipped_boxlist.get_field('classes'), + clipped_boxlist.get_field('scores')]) + + self.assertAllClose(expected_boxes, boxes_out) + self.assertAllEqual(expected_classes, classes_out) + self.assertAllClose(expected_scores, scores_out) + + def test_pad_box_list(self): + boxlist = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) + boxlist.add_field('classes', tf.constant([0, 1])) + boxlist.add_field('scores', tf.constant([0.75, 0.2])) + num_boxes = 4 + padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) + + expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], + [0, 0, 0, 0], [0, 0, 0, 0]] + expected_classes = [0, 1, 0, 0] + expected_scores = [0.75, 0.2, 0, 0] + with self.test_session() as sess: + boxes_out, classes_out, scores_out = sess.run( + [padded_boxlist.get(), padded_boxlist.get_field('classes'), + padded_boxlist.get_field('scores')]) + + self.assertAllClose(expected_boxes, boxes_out) + self.assertAllEqual(expected_classes, classes_out) + self.assertAllClose(expected_scores, scores_out) + + def test_select_random_box(self): + boxes = [[0., 0., 1., 1.], + [0., 1., 2., 3.], + [0., 2., 3., 4.]] + + corners = tf.constant(boxes, dtype=tf.float32) + boxlist = box_list.BoxList(corners) + random_bbox, valid = box_list_ops.select_random_box(boxlist) + with self.test_session() as sess: + random_bbox_out, valid_out = sess.run([random_bbox, valid]) + + norm_small = any( + [np.linalg.norm(random_bbox_out - box) < 1e-6 for box in boxes]) + + self.assertTrue(norm_small) + self.assertTrue(valid_out) + + def test_select_random_box_with_empty_boxlist(self): + corners = tf.constant([], shape=[0, 4], dtype=tf.float32) + boxlist = box_list.BoxList(corners) + random_bbox, valid = box_list_ops.select_random_box(boxlist) + with self.test_session() as sess: + random_bbox_out, valid_out = sess.run([random_bbox, valid]) + + expected_bbox_out = np.array([[-1., -1., -1., -1.]], dtype=np.float32) + self.assertAllEqual(expected_bbox_out, random_bbox_out) + self.assertFalse(valid_out) + + def test_get_minimal_coverage_box(self): + boxes = [[0., 0., 1., 1.], + [-1., 1., 2., 3.], + [0., 2., 3., 4.]] + + expected_coverage_box = [[-1., 0., 3., 4.]] + + corners = tf.constant(boxes, dtype=tf.float32) + boxlist = box_list.BoxList(corners) + coverage_box = box_list_ops.get_minimal_coverage_box(boxlist) + with self.test_session() as sess: + coverage_box_out = sess.run(coverage_box) + + self.assertAllClose(expected_coverage_box, coverage_box_out) + + def test_get_minimal_coverage_box_with_empty_boxlist(self): + corners = tf.constant([], shape=[0, 4], dtype=tf.float32) + boxlist = box_list.BoxList(corners) + coverage_box = box_list_ops.get_minimal_coverage_box(boxlist) + with self.test_session() as sess: + coverage_box_out = sess.run(coverage_box) + + self.assertAllClose([[0.0, 0.0, 1.0, 1.0]], coverage_box_out) + + +class ConcatenateTest(tf.test.TestCase): + + def test_invalid_input_box_list_list(self): + with self.assertRaises(ValueError): + box_list_ops.concatenate(None) + with self.assertRaises(ValueError): + box_list_ops.concatenate([]) + with self.assertRaises(ValueError): + corners = tf.constant([[0, 0, 0, 0]], tf.float32) + boxlist = box_list.BoxList(corners) + box_list_ops.concatenate([boxlist, 2]) + + def test_concatenate_with_missing_fields(self): + corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) + scores1 = tf.constant([1.0, 2.1]) + corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) + boxlist1 = box_list.BoxList(corners1) + boxlist1.add_field('scores', scores1) + boxlist2 = box_list.BoxList(corners2) + with self.assertRaises(ValueError): + box_list_ops.concatenate([boxlist1, boxlist2]) + + def test_concatenate_with_incompatible_field_shapes(self): + corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) + scores1 = tf.constant([1.0, 2.1]) + corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) + scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]]) + boxlist1 = box_list.BoxList(corners1) + boxlist1.add_field('scores', scores1) + boxlist2 = box_list.BoxList(corners2) + boxlist2.add_field('scores', scores2) + with self.assertRaises(ValueError): + box_list_ops.concatenate([boxlist1, boxlist2]) + + def test_concatenate_is_correct(self): + corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) + scores1 = tf.constant([1.0, 2.1]) + corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]], + tf.float32) + scores2 = tf.constant([1.0, 2.1, 5.6]) + + exp_corners = [[0, 0, 0, 0], + [1, 2, 3, 4], + [0, 3, 1, 6], + [2, 4, 3, 8], + [1, 0, 5, 10]] + exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6] + + boxlist1 = box_list.BoxList(corners1) + boxlist1.add_field('scores', scores1) + boxlist2 = box_list.BoxList(corners2) + boxlist2.add_field('scores', scores2) + result = box_list_ops.concatenate([boxlist1, boxlist2]) + with self.test_session() as sess: + corners_output, scores_output = sess.run( + [result.get(), result.get_field('scores')]) + self.assertAllClose(corners_output, exp_corners) + self.assertAllClose(scores_output, exp_scores) + + +class NonMaxSuppressionTest(tf.test.TestCase): + + def test_select_from_three_clusters(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1], + [0, -0.1, 1, 0.9], + [0, 10, 1, 11], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) + iou_thresh = .5 + max_output_size = 3 + + exp_nms = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 100, 1, 101]] + nms = box_list_ops.non_max_suppression( + boxes, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_select_at_most_two_boxes_from_three_clusters(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1], + [0, -0.1, 1, 0.9], + [0, 10, 1, 11], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) + iou_thresh = .5 + max_output_size = 2 + + exp_nms = [[0, 10, 1, 11], + [0, 0, 1, 1]] + nms = box_list_ops.non_max_suppression( + boxes, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_select_at_most_thirty_boxes_from_three_clusters(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1], + [0, -0.1, 1, 0.9], + [0, 10, 1, 11], + [0, 10.1, 1, 11.1], + [0, 100, 1, 101]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) + iou_thresh = .5 + max_output_size = 30 + + exp_nms = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 100, 1, 101]] + nms = box_list_ops.non_max_suppression( + boxes, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_select_single_box(self): + corners = tf.constant([[0, 0, 1, 1]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant([.9])) + iou_thresh = .5 + max_output_size = 3 + + exp_nms = [[0, 0, 1, 1]] + nms = box_list_ops.non_max_suppression( + boxes, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_select_from_ten_identical_boxes(self): + corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32) + boxes = box_list.BoxList(corners) + boxes.add_field('scores', tf.constant(10 * [.9])) + iou_thresh = .5 + max_output_size = 3 + + exp_nms = [[0, 0, 1, 1]] + nms = box_list_ops.non_max_suppression( + boxes, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_copy_extra_fields(self): + corners = tf.constant([[0, 0, 1, 1], + [0, 0.1, 1, 1.1]], tf.float32) + boxes = box_list.BoxList(corners) + tensor1 = np.array([[1], [4]]) + tensor2 = np.array([[1, 1], [2, 2]]) + boxes.add_field('tensor1', tf.constant(tensor1)) + boxes.add_field('tensor2', tf.constant(tensor2)) + new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10], + [1, 3, 5, 5]], tf.float32)) + new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes) + with self.test_session() as sess: + self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1'))) + self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2'))) + + +class CoordinatesConversionTest(tf.test.TestCase): + + def test_to_normalized_coordinates(self): + coordinates = tf.constant([[0, 0, 100, 100], + [25, 25, 75, 75]], tf.float32) + img = tf.ones((128, 100, 100, 3)) + boxlist = box_list.BoxList(coordinates) + normalized_boxlist = box_list_ops.to_normalized_coordinates( + boxlist, tf.shape(img)[1], tf.shape(img)[2]) + expected_boxes = [[0, 0, 1, 1], + [0.25, 0.25, 0.75, 0.75]] + + with self.test_session() as sess: + normalized_boxes = sess.run(normalized_boxlist.get()) + self.assertAllClose(normalized_boxes, expected_boxes) + + def test_to_normalized_coordinates_already_normalized(self): + coordinates = tf.constant([[0, 0, 1, 1], + [0.25, 0.25, 0.75, 0.75]], tf.float32) + img = tf.ones((128, 100, 100, 3)) + boxlist = box_list.BoxList(coordinates) + normalized_boxlist = box_list_ops.to_normalized_coordinates( + boxlist, tf.shape(img)[1], tf.shape(img)[2]) + + with self.test_session() as sess: + with self.assertRaisesOpError('assertion failed'): + sess.run(normalized_boxlist.get()) + + def test_to_absolute_coordinates(self): + coordinates = tf.constant([[0, 0, 1, 1], + [0.25, 0.25, 0.75, 0.75]], tf.float32) + img = tf.ones((128, 100, 100, 3)) + boxlist = box_list.BoxList(coordinates) + absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + expected_boxes = [[0, 0, 100, 100], + [25, 25, 75, 75]] + + with self.test_session() as sess: + absolute_boxes = sess.run(absolute_boxlist.get()) + self.assertAllClose(absolute_boxes, expected_boxes) + + def test_to_absolute_coordinates_already_abolute(self): + coordinates = tf.constant([[0, 0, 100, 100], + [25, 25, 75, 75]], tf.float32) + img = tf.ones((128, 100, 100, 3)) + boxlist = box_list.BoxList(coordinates) + absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + + with self.test_session() as sess: + with self.assertRaisesOpError('assertion failed'): + sess.run(absolute_boxlist.get()) + + def test_convert_to_normalized_and_back(self): + coordinates = np.random.uniform(size=(100, 4)) + coordinates = np.round(np.sort(coordinates) * 200) + coordinates[:, 2:4] += 1 + coordinates[99, :] = [0, 0, 201, 201] + img = tf.ones((128, 202, 202, 3)) + + boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) + boxlist = box_list_ops.to_normalized_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + boxlist = box_list_ops.to_absolute_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + + with self.test_session() as sess: + out = sess.run(boxlist.get()) + self.assertAllClose(out, coordinates) + + def test_convert_to_absolute_and_back(self): + coordinates = np.random.uniform(size=(100, 4)) + coordinates = np.sort(coordinates) + coordinates[99, :] = [0, 0, 1, 1] + img = tf.ones((128, 202, 202, 3)) + + boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) + boxlist = box_list_ops.to_absolute_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + boxlist = box_list_ops.to_normalized_coordinates(boxlist, + tf.shape(img)[1], + tf.shape(img)[2]) + + with self.test_session() as sess: + out = sess.run(boxlist.get()) + self.assertAllClose(out, coordinates) + + def test_to_absolute_coordinates_maximum_coordinate_check(self): + coordinates = tf.constant([[0, 0, 1.2, 1.2], + [0.25, 0.25, 0.75, 0.75]], tf.float32) + img = tf.ones((128, 100, 100, 3)) + boxlist = box_list.BoxList(coordinates) + absolute_boxlist = box_list_ops.to_absolute_coordinates( + boxlist, + tf.shape(img)[1], + tf.shape(img)[2], + maximum_normalized_coordinate=1.1) + + with self.test_session() as sess: + with self.assertRaisesOpError('assertion failed'): + sess.run(absolute_boxlist.get()) + + +class BoxRefinementTest(tf.test.TestCase): + + def test_box_voting(self): + candidates = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32)) + candidates.add_field('ExtraField', tf.constant([1, 2])) + pool = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], + [0.6, 0.6, 0.8, 0.8]], tf.float32)) + pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) + averaged_boxes = box_list_ops.box_voting(candidates, pool) + expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] + expected_scores = [0.5, 0.3] + with self.test_session() as sess: + boxes_out, scores_out, extra_field_out = sess.run( + [averaged_boxes.get(), averaged_boxes.get_field('scores'), + averaged_boxes.get_field('ExtraField')]) + + self.assertAllClose(expected_boxes, boxes_out) + self.assertAllClose(expected_scores, scores_out) + self.assertAllEqual(extra_field_out, [1, 2]) + + def test_box_voting_fails_with_negative_scores(self): + candidates = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) + pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) + pool.add_field('scores', tf.constant([-0.2])) + averaged_boxes = box_list_ops.box_voting(candidates, pool) + + with self.test_session() as sess: + with self.assertRaisesOpError('Scores must be non negative'): + sess.run([averaged_boxes.get()]) + + def test_box_voting_fails_when_unmatched(self): + candidates = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) + pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32)) + pool.add_field('scores', tf.constant([0.2])) + averaged_boxes = box_list_ops.box_voting(candidates, pool) + + with self.test_session() as sess: + with self.assertRaisesOpError('Each box in selected_boxes must match ' + 'with at least one box in pool_boxes.'): + sess.run([averaged_boxes.get()]) + + def test_refine_boxes(self): + pool = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], + [0.6, 0.6, 0.8, 0.8]], tf.float32)) + pool.add_field('ExtraField', tf.constant([1, 2, 3])) + pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) + refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10) + + expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] + expected_scores = [0.5, 0.3] + with self.test_session() as sess: + boxes_out, scores_out, extra_field_out = sess.run( + [refined_boxes.get(), refined_boxes.get_field('scores'), + refined_boxes.get_field('ExtraField')]) + + self.assertAllClose(expected_boxes, boxes_out) + self.assertAllClose(expected_scores, scores_out) + self.assertAllEqual(extra_field_out, [1, 3]) + + def test_refine_boxes_multi_class(self): + pool = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], + [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) + pool.add_field('classes', tf.constant([0, 0, 1, 1])) + pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2])) + refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10) + + expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8], + [0.2, 0.2, 0.3, 0.3]] + expected_scores = [0.5, 0.3, 0.2] + with self.test_session() as sess: + boxes_out, scores_out, extra_field_out = sess.run( + [refined_boxes.get(), refined_boxes.get_field('scores'), + refined_boxes.get_field('classes')]) + + self.assertAllClose(expected_boxes, boxes_out) + self.assertAllClose(expected_scores, scores_out) + self.assertAllEqual(extra_field_out, [0, 1, 1]) + + def test_sample_boxes_by_jittering(self): + boxes = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], + [0.1, 0.1, 0.5, 0.5], + [0.6, 0.6, 0.8, 0.8], + [0.2, 0.2, 0.3, 0.3]], tf.float32)) + sampled_boxes = box_list_ops.sample_boxes_by_jittering( + boxlist=boxes, num_boxes_to_sample=10) + iou = box_list_ops.iou(boxes, sampled_boxes) + iou_max = tf.reduce_max(iou, axis=0) + with self.test_session() as sess: + (np_sampled_boxes, np_iou_max) = sess.run([sampled_boxes.get(), iou_max]) + self.assertAllEqual(np_sampled_boxes.shape, [10, 4]) + self.assertAllGreater(np_iou_max, 0.5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/box_list_test.py b/core/box_list_test.py new file mode 100644 index 0000000..edc00eb --- /dev/null +++ b/core/box_list_test.py @@ -0,0 +1,134 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.box_list.""" + +import tensorflow as tf + +from object_detection.core import box_list + + +class BoxListTest(tf.test.TestCase): + """Tests for BoxList class.""" + + def test_num_boxes(self): + data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) + expected_num_boxes = 3 + + boxes = box_list.BoxList(data) + with self.test_session() as sess: + num_boxes_output = sess.run(boxes.num_boxes()) + self.assertEquals(num_boxes_output, expected_num_boxes) + + def test_get_correct_center_coordinates_and_sizes(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + boxes = box_list.BoxList(tf.constant(boxes)) + centers_sizes = boxes.get_center_coordinates_and_sizes() + expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]] + with self.test_session() as sess: + centers_sizes_out = sess.run(centers_sizes) + self.assertAllClose(centers_sizes_out, expected_centers_sizes) + + def test_create_box_list_with_dynamic_shape(self): + data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) + indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1]) + data = tf.gather(data, indices) + assert data.get_shape().as_list() == [None, 4] + expected_num_boxes = 2 + + boxes = box_list.BoxList(data) + with self.test_session() as sess: + num_boxes_output = sess.run(boxes.num_boxes()) + self.assertEquals(num_boxes_output, expected_num_boxes) + + def test_transpose_coordinates(self): + boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + boxes = box_list.BoxList(tf.constant(boxes)) + boxes.transpose_coordinates() + expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]] + with self.test_session() as sess: + corners_out = sess.run(boxes.get()) + self.assertAllClose(corners_out, expected_corners) + + def test_box_list_invalid_inputs(self): + data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32) + data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32) + data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32) + + with self.assertRaises(ValueError): + _ = box_list.BoxList(data0) + with self.assertRaises(ValueError): + _ = box_list.BoxList(data1) + with self.assertRaises(ValueError): + _ = box_list.BoxList(data2) + + def test_num_boxes_static(self): + box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] + boxes = box_list.BoxList(tf.constant(box_corners)) + self.assertEquals(boxes.num_boxes_static(), 2) + self.assertEquals(type(boxes.num_boxes_static()), int) + + def test_num_boxes_static_for_uninferrable_shape(self): + placeholder = tf.placeholder(tf.float32, shape=[None, 4]) + boxes = box_list.BoxList(placeholder) + self.assertEquals(boxes.num_boxes_static(), None) + + def test_as_tensor_dict(self): + boxlist = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) + boxlist.add_field('classes', tf.constant([0, 1])) + boxlist.add_field('scores', tf.constant([0.75, 0.2])) + tensor_dict = boxlist.as_tensor_dict() + + expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] + expected_classes = [0, 1] + expected_scores = [0.75, 0.2] + + with self.test_session() as sess: + tensor_dict_out = sess.run(tensor_dict) + self.assertAllEqual(3, len(tensor_dict_out)) + self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) + self.assertAllEqual(expected_classes, tensor_dict_out['classes']) + self.assertAllClose(expected_scores, tensor_dict_out['scores']) + + def test_as_tensor_dict_with_features(self): + boxlist = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) + boxlist.add_field('classes', tf.constant([0, 1])) + boxlist.add_field('scores', tf.constant([0.75, 0.2])) + tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores']) + + expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] + expected_classes = [0, 1] + expected_scores = [0.75, 0.2] + + with self.test_session() as sess: + tensor_dict_out = sess.run(tensor_dict) + self.assertAllEqual(3, len(tensor_dict_out)) + self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) + self.assertAllEqual(expected_classes, tensor_dict_out['classes']) + self.assertAllClose(expected_scores, tensor_dict_out['scores']) + + def test_as_tensor_dict_missing_field(self): + boxlist = box_list.BoxList( + tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) + boxlist.add_field('classes', tf.constant([0, 1])) + boxlist.add_field('scores', tf.constant([0.75, 0.2])) + with self.assertRaises(ValueError): + boxlist.as_tensor_dict(['foo', 'bar']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/box_predictor.py b/core/box_predictor.py new file mode 100644 index 0000000..91a6647 --- /dev/null +++ b/core/box_predictor.py @@ -0,0 +1,227 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Box predictor for object detectors. + +Box predictors are classes that take a high level +image feature map as input and produce two predictions, +(1) a tensor encoding box locations, and +(2) a tensor encoding classes for each box. + +These components are passed directly to loss functions +in our detection models. + +These modules are separated from the main model since the same +few box predictor architectures are shared across many models. +""" +from abc import abstractmethod +import tensorflow as tf + +BOX_ENCODINGS = 'box_encodings' +CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background' +MASK_PREDICTIONS = 'mask_predictions' + + +class BoxPredictor(object): + """BoxPredictor.""" + + def __init__(self, is_training, num_classes): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + """ + self._is_training = is_training + self._num_classes = num_classes + + @property + def is_keras_model(self): + return False + + @property + def num_classes(self): + return self._num_classes + + def predict(self, image_features, num_predictions_per_location, + scope=None, **params): + """Computes encoded object locations and corresponding confidences. + + Takes a list of high level image feature maps as input and produces a list + of box encodings and a list of class scores where each element in the output + lists correspond to the feature maps in the input list. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + num_predictions_per_location: A list of integers representing the number + of box predictions to be made per spatial location for each feature map. + scope: Variable and Op scope name. + **params: Additional keyword arguments for specific implementations of + BoxPredictor. + + Returns: + A dictionary containing at least the following tensors. + box_encodings: A list of float tensors. Each entry in the list + corresponds to a feature map in the input `image_features` list. All + tensors in the list have one of the two following shapes: + a. [batch_size, num_anchors_i, q, code_size] representing the location + of the objects, where q is 1 or the number of classes. + b. [batch_size, num_anchors_i, code_size]. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + + Raises: + ValueError: If length of `image_features` is not equal to length of + `num_predictions_per_location`. + """ + if len(image_features) != len(num_predictions_per_location): + raise ValueError('image_feature and num_predictions_per_location must ' + 'be of same length, found: {} vs {}'. + format(len(image_features), + len(num_predictions_per_location))) + if scope is not None: + with tf.variable_scope(scope): + return self._predict(image_features, num_predictions_per_location, + **params) + return self._predict(image_features, num_predictions_per_location, + **params) + + # TODO(rathodv): num_predictions_per_location could be moved to constructor. + # This is currently only used by ConvolutionalBoxPredictor. + @abstractmethod + def _predict(self, image_features, num_predictions_per_location, **params): + """Implementations must override this method. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + num_predictions_per_location: A list of integers representing the number + of box predictions to be made per spatial location for each feature map. + **params: Additional keyword arguments for specific implementations of + BoxPredictor. + + Returns: + A dictionary containing at least the following tensors. + box_encodings: A list of float tensors. Each entry in the list + corresponds to a feature map in the input `image_features` list. All + tensors in the list have one of the two following shapes: + a. [batch_size, num_anchors_i, q, code_size] representing the location + of the objects, where q is 1 or the number of classes. + b. [batch_size, num_anchors_i, code_size]. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + """ + pass + + +class KerasBoxPredictor(tf.keras.Model): + """Keras-based BoxPredictor.""" + + def __init__(self, is_training, num_classes, freeze_batchnorm, + inplace_batchnorm_update, name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + """ + super(KerasBoxPredictor, self).__init__(name=name) + + self._is_training = is_training + self._num_classes = num_classes + self._freeze_batchnorm = freeze_batchnorm + self._inplace_batchnorm_update = inplace_batchnorm_update + + @property + def is_keras_model(self): + return True + + @property + def num_classes(self): + return self._num_classes + + def call(self, image_features, **kwargs): + """Computes encoded object locations and corresponding confidences. + + Takes a list of high level image feature maps as input and produces a list + of box encodings and a list of class scores where each element in the output + lists correspond to the feature maps in the input list. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + **kwargs: Additional keyword arguments for specific implementations of + BoxPredictor. + + Returns: + A dictionary containing at least the following tensors. + box_encodings: A list of float tensors. Each entry in the list + corresponds to a feature map in the input `image_features` list. All + tensors in the list have one of the two following shapes: + a. [batch_size, num_anchors_i, q, code_size] representing the location + of the objects, where q is 1 or the number of classes. + b. [batch_size, num_anchors_i, code_size]. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + """ + return self._predict(image_features, **kwargs) + + @abstractmethod + def _predict(self, image_features, **kwargs): + """Implementations must override this method. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + **kwargs: Additional keyword arguments for specific implementations of + BoxPredictor. + + Returns: + A dictionary containing at least the following tensors. + box_encodings: A list of float tensors. Each entry in the list + corresponds to a feature map in the input `image_features` list. All + tensors in the list have one of the two following shapes: + a. [batch_size, num_anchors_i, q, code_size] representing the location + of the objects, where q is 1 or the number of classes. + b. [batch_size, num_anchors_i, code_size]. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + """ + raise NotImplementedError diff --git a/core/class_agnostic_nms_test.py b/core/class_agnostic_nms_test.py new file mode 100644 index 0000000..8a418b7 --- /dev/null +++ b/core/class_agnostic_nms_test.py @@ -0,0 +1,155 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for google3.third_party.tensorflow_models.object_detection.core.class_agnostic_nms.""" +from absl.testing import parameterized +import tensorflow as tf +from object_detection.core import post_processing +from object_detection.core import standard_fields as fields +from object_detection.utils import test_case + + +class ClassAgnosticNonMaxSuppressionTest(test_case.TestCase, + parameterized.TestCase): + + def test_class_agnostic_nms_select_with_shared_boxes(self): + boxes = tf.constant( + [[[0, 0, 1, 1]], [[0, 0.1, 1, 1.1]], [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], [[0, 10.1, 1, 11.1]], [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], [.01, .85], [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_classes_per_detection = 1 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], [0, 0, 1, 1], [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + + nms, _ = post_processing.class_agnostic_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_classes_per_detection, + max_output_size) + + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run([ + nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes) + ]) + + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + + def test_class_agnostic_nms_select_with_per_class_boxes(self): + boxes = tf.constant( + [[[4, 5, 9, 10], [0, 0, 1, 1]], + [[0, 0.1, 1, 1.1], [4, 5, 9, 10]], + [[0, -0.1, 1, 0.9], [4, 5, 9, 10]], + [[0, 10, 1, 11], [4, 5, 9, 10]], + [[0, 10.1, 1, 11.1], [4, 5, 9, 10]], + [[0, 100, 1, 101], [4, 5, 9, 10]], + [[4, 5, 9, 10], [0, 1000, 1, 1002]], + [[4, 5, 9, 10], [0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.01, 0.9], + [.75, 0.05], + [.6, 0.01], + [.95, 0], + [.5, 0.01], + [.3, 0.01], + [.01, .85], + [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_classes_per_detection = 1 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 1, 1, 0] + + nms, _ = post_processing.class_agnostic_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_classes_per_detection, + max_output_size) + + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run([ + nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes) + ]) + + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + # Two cases will be tested here: using / not using static shapes. + # Named the two test cases for easier control during testing, with a flag of + # '--test_filter=ClassAgnosticNonMaxSuppressionTest.test_batch_classagnostic_nms_with_batch_size_1' + # or + # '--test_filter=ClassAgnosticNonMaxSuppressionTest.test_batch_classagnostic_nms_with_batch_size_1_use_static_shapes'. + @parameterized.named_parameters(('', False), ('_use_static_shapes', True)) + def test_batch_classagnostic_nms_with_batch_size_1(self, + use_static_shapes=False): + boxes = tf.constant( + [[[[0, 0, 1, 1]], [[0, 0.1, 1, 1.1]], [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], [[0, 10.1, 1, 11.1]], [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], [[0, 1000, 1, 1002.1]]]], tf.float32) + scores = tf.constant([[[.9, 0.01], [.75, 0.05], [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], [.01, .85], [.01, .5]]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + max_classes_per_detection = 1 + use_class_agnostic_nms = True + + exp_nms_corners = [[[0, 10, 1, 11], [0, 0, 1, 1], [0, 1000, 1, 1002], + [0, 100, 1, 101]]] + exp_nms_scores = [[.95, .9, .85, .3]] + exp_nms_classes = [[0, 0, 1, 0]] + + (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, + nmsed_additional_fields, + num_detections) = post_processing.batch_multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_size_per_class=max_output_size, + max_total_size=max_output_size, + use_class_agnostic_nms=use_class_agnostic_nms, + use_static_shapes=use_static_shapes, + max_classes_per_detection=max_classes_per_detection) + + self.assertIsNone(nmsed_masks) + self.assertIsNone(nmsed_additional_fields) + + with self.test_session() as sess: + (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = sess.run( + [nmsed_boxes, nmsed_scores, nmsed_classes, num_detections]) + self.assertAllClose(nmsed_boxes, exp_nms_corners) + self.assertAllClose(nmsed_scores, exp_nms_scores) + self.assertAllClose(nmsed_classes, exp_nms_classes) + self.assertEqual(num_detections, [4]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/data_decoder.py b/core/data_decoder.py new file mode 100644 index 0000000..87ddf72 --- /dev/null +++ b/core/data_decoder.py @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Interface for data decoders. + +Data decoders decode the input data and return a dictionary of tensors keyed by +the entries in core.reader.Fields. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from abc import ABCMeta +from abc import abstractmethod +import six + + +class DataDecoder(six.with_metaclass(ABCMeta, object)): + """Interface for data decoders.""" + + @abstractmethod + def decode(self, data): + """Return a single image and associated labels. + + Args: + data: a string tensor holding a serialized protocol buffer corresponding + to data for a single image. + + Returns: + tensor_dict: a dictionary containing tensors. Possible keys are defined in + reader.Fields. + """ + pass diff --git a/core/data_parser.py b/core/data_parser.py new file mode 100644 index 0000000..889545d --- /dev/null +++ b/core/data_parser.py @@ -0,0 +1,45 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Interface for data parsers. + +Data parser parses input data and returns a dictionary of numpy arrays +keyed by the entries in standard_fields.py. Since the parser parses records +to numpy arrays (materialized tensors) directly, it is used to read data for +evaluation/visualization; to parse the data during training, DataDecoder should +be used. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from abc import ABCMeta +from abc import abstractmethod +import six + + +class DataToNumpyParser(six.with_metaclass(ABCMeta, object)): + """Abstract interface for data parser that produces numpy arrays.""" + + @abstractmethod + def parse(self, input_data): + """Parses input and returns a numpy array or a dictionary of numpy arrays. + + Args: + input_data: an input data + + Returns: + A numpy array or a dictionary of numpy arrays or None, if input + cannot be parsed. + """ + pass diff --git a/core/freezable_batch_norm.py b/core/freezable_batch_norm.py new file mode 100644 index 0000000..be82fcd --- /dev/null +++ b/core/freezable_batch_norm.py @@ -0,0 +1,68 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A freezable batch norm layer that uses Keras batch normalization.""" +import tensorflow as tf + + +class FreezableBatchNorm(tf.keras.layers.BatchNormalization): + """Batch normalization layer (Ioffe and Szegedy, 2014). + + This is a `freezable` batch norm layer that supports setting the `training` + parameter in the __init__ method rather than having to set it either via + the Keras learning phase or via the `call` method parameter. This layer will + forward all other parameters to the default Keras `BatchNormalization` + layer + + This is class is necessary because Object Detection model training sometimes + requires batch normalization layers to be `frozen` and used as if it was + evaluation time, despite still training (and potentially using dropout layers) + + Like the default Keras BatchNormalization layer, this will normalize the + activations of the previous layer at each batch, + i.e. applies a transformation that maintains the mean activation + close to 0 and the activation standard deviation close to 1. + + Arguments: + training: If False, the layer will normalize using the moving average and + std. dev, without updating the learned avg and std. dev. + If None or True, the layer will follow the keras BatchNormalization layer + strategy of checking the Keras learning phase at `call` time to decide + what to do. + **kwargs: The keyword arguments to forward to the keras BatchNormalization + layer constructor. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + + References: + - [Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift](https://arxiv.org/abs/1502.03167) + """ + + def __init__(self, training=None, **kwargs): + super(FreezableBatchNorm, self).__init__(**kwargs) + self._training = training + + def call(self, inputs, training=None): + # Override the call arg only if the batchnorm is frozen. (Ignore None) + if self._training is False: # pylint: disable=g-bool-id-comparison + training = self._training + return super(FreezableBatchNorm, self).call(inputs, training=training) diff --git a/core/freezable_batch_norm_test.py b/core/freezable_batch_norm_test.py new file mode 100644 index 0000000..3e06152 --- /dev/null +++ b/core/freezable_batch_norm_test.py @@ -0,0 +1,189 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.freezable_batch_norm.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import zip +import tensorflow as tf + +from object_detection.core import freezable_batch_norm + + +class FreezableBatchNormTest(tf.test.TestCase): + """Tests for FreezableBatchNorm operations.""" + + def _build_model(self, training=None): + model = tf.keras.models.Sequential() + norm = freezable_batch_norm.FreezableBatchNorm(training=training, + input_shape=(10,), + momentum=0.8) + model.add(norm) + return model, norm + + def _train_freezable_batch_norm(self, training_mean, training_var): + model, _ = self._build_model() + model.compile(loss='mse', optimizer='sgd') + + # centered on training_mean, variance training_var + train_data = np.random.normal( + loc=training_mean, + scale=training_var, + size=(1000, 10)) + model.fit(train_data, train_data, epochs=4, verbose=0) + return model.weights + + def _test_batchnorm_layer( + self, norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, training_mean, training_var): + out_tensor = norm(tf.convert_to_tensor(test_data, dtype=tf.float32), + training=training_arg) + out = tf.keras.backend.eval(out_tensor) + out -= tf.keras.backend.eval(norm.beta) + out /= tf.keras.backend.eval(norm.gamma) + + if not should_be_training: + out *= training_var + out += (training_mean - testing_mean) + out /= testing_var + + np.testing.assert_allclose(out.mean(), 0.0, atol=1.5e-1) + np.testing.assert_allclose(out.std(), 1.0, atol=1.5e-1) + + def test_batchnorm_freezing_training_none(self): + with self.test_session(): + training_mean = 5.0 + training_var = 10.0 + + testing_mean = -10.0 + testing_var = 5.0 + + # Initially train the batch norm, and save the weights + trained_weights = self._train_freezable_batch_norm(training_mean, + training_var) + + # Load the batch norm weights, freezing training to True. + # Apply the batch norm layer to testing data and ensure it is normalized + # according to the batch statistics. + model, norm = self._build_model(training=True) + for trained_weight, blank_weight in zip(trained_weights, model.weights): + weight_copy = blank_weight.assign(tf.keras.backend.eval(trained_weight)) + tf.keras.backend.eval(weight_copy) + + # centered on testing_mean, variance testing_var + test_data = np.random.normal( + loc=testing_mean, + scale=testing_var, + size=(1000, 10)) + + # Test with training=True passed to the call method: + training_arg = True + should_be_training = True + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + # Test with training=False passed to the call method: + training_arg = False + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + # Test the layer in various Keras learning phase scopes: + training_arg = None + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + tf.keras.backend.set_learning_phase(True) + should_be_training = True + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + tf.keras.backend.set_learning_phase(False) + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + def test_batchnorm_freezing_training_false(self): + with self.test_session(): + training_mean = 5.0 + training_var = 10.0 + + testing_mean = -10.0 + testing_var = 5.0 + + # Initially train the batch norm, and save the weights + trained_weights = self._train_freezable_batch_norm(training_mean, + training_var) + + # Load the batch norm back up, freezing training to False. + # Apply the batch norm layer to testing data and ensure it is normalized + # according to the training data's statistics. + model, norm = self._build_model(training=False) + for trained_weight, blank_weight in zip(trained_weights, model.weights): + weight_copy = blank_weight.assign(tf.keras.backend.eval(trained_weight)) + tf.keras.backend.eval(weight_copy) + + # centered on testing_mean, variance testing_var + test_data = np.random.normal( + loc=testing_mean, + scale=testing_var, + size=(1000, 10)) + + # Make sure that the layer is never training + # Test with training=True passed to the call method: + training_arg = True + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + # Test with training=False passed to the call method: + training_arg = False + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + # Test the layer in various Keras learning phase scopes: + training_arg = None + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + tf.keras.backend.set_learning_phase(True) + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + tf.keras.backend.set_learning_phase(False) + should_be_training = False + self._test_batchnorm_layer(norm, should_be_training, test_data, + testing_mean, testing_var, training_arg, + training_mean, training_var) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/keypoint_ops.py b/core/keypoint_ops.py new file mode 100644 index 0000000..e520845 --- /dev/null +++ b/core/keypoint_ops.py @@ -0,0 +1,282 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Keypoint operations. + +Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2], +where the last dimension holds rank 2 tensors of the form [y, x] representing +the coordinates of the keypoint. +""" +import numpy as np +import tensorflow as tf + + +def scale(keypoints, y_scale, x_scale, scope=None): + """Scales keypoint coordinates in x and y dimensions. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + y_scale: (float) scalar tensor + x_scale: (float) scalar tensor + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'Scale'): + y_scale = tf.cast(y_scale, tf.float32) + x_scale = tf.cast(x_scale, tf.float32) + new_keypoints = keypoints * [[[y_scale, x_scale]]] + return new_keypoints + + +def clip_to_window(keypoints, window, scope=None): + """Clips keypoints to a window. + + This op clips any input keypoints to a window. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] + window to which the op should clip the keypoints. + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'ClipToWindow'): + y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) + win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + y = tf.maximum(tf.minimum(y, win_y_max), win_y_min) + x = tf.maximum(tf.minimum(x, win_x_max), win_x_min) + new_keypoints = tf.concat([y, x], 2) + return new_keypoints + + +def prune_outside_window(keypoints, window, scope=None): + """Prunes keypoints that fall outside a given window. + + This function replaces keypoints that fall outside the given window with nan. + See also clip_to_window which clips any keypoints that fall outside the given + window. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] + window outside of which the op should prune the keypoints. + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'PruneOutsideWindow'): + y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) + win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + + valid_indices = tf.logical_and( + tf.logical_and(y >= win_y_min, y <= win_y_max), + tf.logical_and(x >= win_x_min, x <= win_x_max)) + + new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y)) + new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x)) + new_keypoints = tf.concat([new_y, new_x], 2) + + return new_keypoints + + +def change_coordinate_frame(keypoints, window, scope=None): + """Changes coordinate frame of the keypoints to be relative to window's frame. + + Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint + coordinates from keypoints of shape [num_instances, num_keypoints, 2] + to be relative to this window. + + An example use case is data augmentation: where we are given groundtruth + keypoints and would like to randomly crop the image to some window. In this + case we need to change the coordinate frame of each groundtruth keypoint to be + relative to this new window. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] + window we should change the coordinate frame to. + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'ChangeCoordinateFrame'): + win_height = window[2] - window[0] + win_width = window[3] - window[1] + new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height, + 1.0 / win_width) + return new_keypoints + + +def to_normalized_coordinates(keypoints, height, width, + check_range=True, scope=None): + """Converts absolute keypoint coordinates to normalized coordinates in [0, 1]. + + Usually one uses the dynamic shape of the image or conv-layer tensor: + keypoints = keypoint_ops.to_normalized_coordinates(keypoints, + tf.shape(images)[1], + tf.shape(images)[2]), + + This function raises an assertion failed error at graph execution time when + the maximum coordinate is smaller than 1.01 (which means that coordinates are + already normalized). The value 1.01 is to deal with small rounding errors. + + Args: + keypoints: A tensor of shape [num_instances, num_keypoints, 2]. + height: Maximum value for y coordinate of absolute keypoint coordinates. + width: Maximum value for x coordinate of absolute keypoint coordinates. + check_range: If True, checks if the coordinates are normalized. + scope: name scope. + + Returns: + tensor of shape [num_instances, num_keypoints, 2] with normalized + coordinates in [0, 1]. + """ + with tf.name_scope(scope, 'ToNormalizedCoordinates'): + height = tf.cast(height, tf.float32) + width = tf.cast(width, tf.float32) + + if check_range: + max_val = tf.reduce_max(keypoints) + max_assert = tf.Assert(tf.greater(max_val, 1.01), + ['max value is lower than 1.01: ', max_val]) + with tf.control_dependencies([max_assert]): + width = tf.identity(width) + + return scale(keypoints, 1.0 / height, 1.0 / width) + + +def to_absolute_coordinates(keypoints, height, width, + check_range=True, scope=None): + """Converts normalized keypoint coordinates to absolute pixel coordinates. + + This function raises an assertion failed error when the maximum keypoint + coordinate value is larger than 1.01 (in which case coordinates are already + absolute). + + Args: + keypoints: A tensor of shape [num_instances, num_keypoints, 2] + height: Maximum value for y coordinate of absolute keypoint coordinates. + width: Maximum value for x coordinate of absolute keypoint coordinates. + check_range: If True, checks if the coordinates are normalized or not. + scope: name scope. + + Returns: + tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates + in terms of the image size. + + """ + with tf.name_scope(scope, 'ToAbsoluteCoordinates'): + height = tf.cast(height, tf.float32) + width = tf.cast(width, tf.float32) + + # Ensure range of input keypoints is correct. + if check_range: + max_val = tf.reduce_max(keypoints) + max_assert = tf.Assert(tf.greater_equal(1.01, max_val), + ['maximum keypoint coordinate value is larger ' + 'than 1.01: ', max_val]) + with tf.control_dependencies([max_assert]): + width = tf.identity(width) + + return scale(keypoints, height, width) + + +def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None): + """Flips the keypoints horizontally around the flip_point. + + This operation flips the x coordinate for each keypoint around the flip_point + and also permutes the keypoints in a manner specified by flip_permutation. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + flip_point: (float) scalar tensor representing the x coordinate to flip the + keypoints around. + flip_permutation: rank 1 int32 tensor containing the keypoint flip + permutation. This specifies the mapping from original keypoint indices + to the flipped keypoint indices. This is used primarily for keypoints + that are not reflection invariant. E.g. Suppose there are 3 keypoints + representing ['head', 'right_eye', 'left_eye'], then a logical choice for + flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' + and 'right_eye' after a horizontal flip. + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'FlipHorizontal'): + keypoints = tf.transpose(keypoints, [1, 0, 2]) + keypoints = tf.gather(keypoints, flip_permutation) + v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) + u = flip_point * 2.0 - u + new_keypoints = tf.concat([v, u], 2) + new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) + return new_keypoints + + +def flip_vertical(keypoints, flip_point, flip_permutation, scope=None): + """Flips the keypoints vertically around the flip_point. + + This operation flips the y coordinate for each keypoint around the flip_point + and also permutes the keypoints in a manner specified by flip_permutation. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + flip_point: (float) scalar tensor representing the y coordinate to flip the + keypoints around. + flip_permutation: rank 1 int32 tensor containing the keypoint flip + permutation. This specifies the mapping from original keypoint indices + to the flipped keypoint indices. This is used primarily for keypoints + that are not reflection invariant. E.g. Suppose there are 3 keypoints + representing ['head', 'right_eye', 'left_eye'], then a logical choice for + flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' + and 'right_eye' after a horizontal flip. + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'FlipVertical'): + keypoints = tf.transpose(keypoints, [1, 0, 2]) + keypoints = tf.gather(keypoints, flip_permutation) + v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) + v = flip_point * 2.0 - v + new_keypoints = tf.concat([v, u], 2) + new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) + return new_keypoints + + +def rot90(keypoints, scope=None): + """Rotates the keypoints counter-clockwise by 90 degrees. + + Args: + keypoints: a tensor of shape [num_instances, num_keypoints, 2] + scope: name scope. + + Returns: + new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] + """ + with tf.name_scope(scope, 'Rot90'): + keypoints = tf.transpose(keypoints, [1, 0, 2]) + v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2) + v = 1.0 - v + new_keypoints = tf.concat([v, u], 2) + new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) + return new_keypoints diff --git a/core/keypoint_ops_test.py b/core/keypoint_ops_test.py new file mode 100644 index 0000000..1c09c55 --- /dev/null +++ b/core/keypoint_ops_test.py @@ -0,0 +1,200 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.keypoint_ops.""" +import numpy as np +import tensorflow as tf + +from object_detection.core import keypoint_ops + + +class KeypointOpsTest(tf.test.TestCase): + """Tests for common keypoint operations.""" + + def test_scale(self): + keypoints = tf.constant([ + [[0.0, 0.0], [100.0, 200.0]], + [[50.0, 120.0], [100.0, 140.0]] + ]) + y_scale = tf.constant(1.0 / 100) + x_scale = tf.constant(1.0 / 200) + + expected_keypoints = tf.constant([ + [[0., 0.], [1.0, 1.0]], + [[0.5, 0.6], [1.0, 0.7]] + ]) + output = keypoint_ops.scale(keypoints, y_scale, x_scale) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_clip_to_window(self): + keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + window = tf.constant([0.25, 0.25, 0.75, 0.75]) + + expected_keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.25], [0.75, 0.75]] + ]) + output = keypoint_ops.clip_to_window(keypoints, window) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_prune_outside_window(self): + keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + window = tf.constant([0.25, 0.25, 0.75, 0.75]) + + expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]], + [[np.nan, np.nan], [np.nan, np.nan]]]) + output = keypoint_ops.prune_outside_window(keypoints, window) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_change_coordinate_frame(self): + keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + window = tf.constant([0.25, 0.25, 0.75, 0.75]) + + expected_keypoints = tf.constant([ + [[0, 0.5], [1.0, 1.0]], + [[0.5, -0.5], [1.5, 1.5]] + ]) + output = keypoint_ops.change_coordinate_frame(keypoints, window) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_to_normalized_coordinates(self): + keypoints = tf.constant([ + [[10., 30.], [30., 45.]], + [[20., 0.], [40., 60.]] + ]) + output = keypoint_ops.to_normalized_coordinates( + keypoints, 40, 60) + expected_keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_to_normalized_coordinates_already_normalized(self): + keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + output = keypoint_ops.to_normalized_coordinates( + keypoints, 40, 60) + + with self.test_session() as sess: + with self.assertRaisesOpError('assertion failed'): + sess.run(output) + + def test_to_absolute_coordinates(self): + keypoints = tf.constant([ + [[0.25, 0.5], [0.75, 0.75]], + [[0.5, 0.0], [1.0, 1.0]] + ]) + output = keypoint_ops.to_absolute_coordinates( + keypoints, 40, 60) + expected_keypoints = tf.constant([ + [[10., 30.], [30., 45.]], + [[20., 0.], [40., 60.]] + ]) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_to_absolute_coordinates_already_absolute(self): + keypoints = tf.constant([ + [[10., 30.], [30., 45.]], + [[20., 0.], [40., 60.]] + ]) + output = keypoint_ops.to_absolute_coordinates( + keypoints, 40, 60) + + with self.test_session() as sess: + with self.assertRaisesOpError('assertion failed'): + sess.run(output) + + def test_flip_horizontal(self): + keypoints = tf.constant([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] + ]) + flip_permutation = [0, 2, 1] + + expected_keypoints = tf.constant([ + [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]], + [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]], + ]) + output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_flip_vertical(self): + keypoints = tf.constant([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] + ]) + flip_permutation = [0, 2, 1] + + expected_keypoints = tf.constant([ + [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]], + [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]], + ]) + output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + def test_rot90(self): + keypoints = tf.constant([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]] + ]) + expected_keypoints = tf.constant([ + [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]], + [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]], + ]) + output = keypoint_ops.rot90(keypoints) + + with self.test_session() as sess: + output_, expected_keypoints_ = sess.run([output, expected_keypoints]) + self.assertAllClose(output_, expected_keypoints_) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/losses.py b/core/losses.py new file mode 100644 index 0000000..b4fa428 --- /dev/null +++ b/core/losses.py @@ -0,0 +1,686 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Classification and regression loss functions for object detection. + +Localization losses: + * WeightedL2LocalizationLoss + * WeightedSmoothL1LocalizationLoss + * WeightedIOULocalizationLoss + +Classification losses: + * WeightedSigmoidClassificationLoss + * WeightedSoftmaxClassificationLoss + * WeightedSoftmaxClassificationAgainstLogitsLoss + * BootstrappedSigmoidClassificationLoss +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import six +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.utils import ops + +slim = tf.contrib.slim + + +class Loss(six.with_metaclass(abc.ABCMeta, object)): + """Abstract base class for loss functions.""" + + def __call__(self, + prediction_tensor, + target_tensor, + ignore_nan_targets=False, + losses_mask=None, + scope=None, + **params): + """Call the loss function. + + Args: + prediction_tensor: an N-d tensor of shape [batch, anchors, ...] + representing predicted quantities. + target_tensor: an N-d tensor of shape [batch, anchors, ...] representing + regression or classification targets. + ignore_nan_targets: whether to ignore nan targets in the loss computation. + E.g. can be used if the target tensor is missing groundtruth data that + shouldn't be factored into the loss. + losses_mask: A [batch] boolean tensor that indicates whether losses should + be applied to individual images in the batch. For elements that + are False, corresponding prediction, target, and weight tensors will not + contribute to loss computation. If None, no filtering will take place + prior to loss computation. + scope: Op scope name. Defaults to 'Loss' if None. + **params: Additional keyword arguments for specific implementations of + the Loss. + + Returns: + loss: a tensor representing the value of the loss function. + """ + with tf.name_scope(scope, 'Loss', + [prediction_tensor, target_tensor, params]) as scope: + if ignore_nan_targets: + target_tensor = tf.where(tf.is_nan(target_tensor), + prediction_tensor, + target_tensor) + if losses_mask is not None: + tensor_multiplier = self._get_loss_multiplier_for_tensor( + prediction_tensor, + losses_mask) + prediction_tensor *= tensor_multiplier + target_tensor *= tensor_multiplier + + if 'weights' in params: + params['weights'] = tf.convert_to_tensor(params['weights']) + weights_multiplier = self._get_loss_multiplier_for_tensor( + params['weights'], + losses_mask) + params['weights'] *= weights_multiplier + return self._compute_loss(prediction_tensor, target_tensor, **params) + + def _get_loss_multiplier_for_tensor(self, tensor, losses_mask): + loss_multiplier_shape = tf.stack([-1] + [1] * (len(tensor.shape) - 1)) + return tf.cast(tf.reshape(losses_mask, loss_multiplier_shape), tf.float32) + + @abc.abstractmethod + def _compute_loss(self, prediction_tensor, target_tensor, **params): + """Method to be overridden by implementations. + + Args: + prediction_tensor: a tensor representing predicted quantities + target_tensor: a tensor representing regression or classification targets + **params: Additional keyword arguments for specific implementations of + the Loss. + + Returns: + loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per + anchor + """ + pass + + +class WeightedL2LocalizationLoss(Loss): + """L2 localization loss function with anchorwise output support. + + Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2 + """ + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + code_size] representing the (encoded) predicted locations of objects. + target_tensor: A float tensor of shape [batch_size, num_anchors, + code_size] representing the regression targets + weights: a float tensor of shape [batch_size, num_anchors] + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] tensor + representing the value of the loss function. + """ + weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims( + weights, 2) + square_diff = 0.5 * tf.square(weighted_diff) + return tf.reduce_sum(square_diff, 2) + + +class WeightedSmoothL1LocalizationLoss(Loss): + """Smooth L1 localization loss function aka Huber Loss.. + + The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and + delta * (|x|- 0.5*delta) otherwise, where x is the difference between + predictions and target. + + See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015) + """ + + def __init__(self, delta=1.0): + """Constructor. + + Args: + delta: delta for smooth L1 loss. + """ + super(WeightedSmoothL1LocalizationLoss, self).__init__() + self._delta = delta + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + code_size] representing the (encoded) predicted locations of objects. + target_tensor: A float tensor of shape [batch_size, num_anchors, + code_size] representing the regression targets + weights: a float tensor of shape [batch_size, num_anchors] + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] tensor + representing the value of the loss function. + """ + return tf.reduce_sum(tf.losses.huber_loss( + target_tensor, + prediction_tensor, + delta=self._delta, + weights=tf.expand_dims(weights, axis=2), + loss_collection=None, + reduction=tf.losses.Reduction.NONE + ), axis=2) + + +class WeightedIOULocalizationLoss(Loss): + """IOU localization loss function. + + Sums the IOU for corresponding pairs of predicted/groundtruth boxes + and for each pair assign a loss of 1 - IOU. We then compute a weighted + sum over all pairs which is returned as the total loss. + """ + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4] + representing the decoded predicted boxes + target_tensor: A float tensor of shape [batch_size, num_anchors, 4] + representing the decoded target boxes + weights: a float tensor of shape [batch_size, num_anchors] + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] tensor + representing the value of the loss function. + """ + predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4])) + target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4])) + per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes, + target_boxes) + return tf.reshape(weights, [-1]) * per_anchor_iou_loss + + +class WeightedSigmoidClassificationLoss(Loss): + """Sigmoid cross entropy classification loss function.""" + + def _compute_loss(self, + prediction_tensor, + target_tensor, + weights, + class_indices=None): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + class_indices: (Optional) A 1-D integer tensor of class indices. + If provided, computes loss only for the specified class indices. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors, num_classes] + representing the value of the loss function. + """ + if class_indices is not None: + weights *= tf.reshape( + ops.indices_to_dense_vector(class_indices, + tf.shape(prediction_tensor)[2]), + [1, 1, -1]) + per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( + labels=target_tensor, logits=prediction_tensor)) + return per_entry_cross_ent * weights + + +class SigmoidFocalClassificationLoss(Loss): + """Sigmoid focal cross entropy loss. + + Focal loss down-weights well classified examples and focusses on the hard + examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition. + """ + + def __init__(self, gamma=2.0, alpha=0.25): + """Constructor. + + Args: + gamma: exponent of the modulating factor (1 - p_t) ^ gamma. + alpha: optional alpha weighting factor to balance positives vs negatives. + """ + super(SigmoidFocalClassificationLoss, self).__init__() + self._alpha = alpha + self._gamma = gamma + + def _compute_loss(self, + prediction_tensor, + target_tensor, + weights, + class_indices=None): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + class_indices: (Optional) A 1-D integer tensor of class indices. + If provided, computes loss only for the specified class indices. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors, num_classes] + representing the value of the loss function. + """ + if class_indices is not None: + weights *= tf.reshape( + ops.indices_to_dense_vector(class_indices, + tf.shape(prediction_tensor)[2]), + [1, 1, -1]) + per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( + labels=target_tensor, logits=prediction_tensor)) + prediction_probabilities = tf.sigmoid(prediction_tensor) + p_t = ((target_tensor * prediction_probabilities) + + ((1 - target_tensor) * (1 - prediction_probabilities))) + modulating_factor = 1.0 + if self._gamma: + modulating_factor = tf.pow(1.0 - p_t, self._gamma) + alpha_weight_factor = 1.0 + if self._alpha is not None: + alpha_weight_factor = (target_tensor * self._alpha + + (1 - target_tensor) * (1 - self._alpha)) + focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor * + per_entry_cross_ent) + return focal_cross_entropy_loss * weights + + +class WeightedSoftmaxClassificationLoss(Loss): + """Softmax loss function.""" + + def __init__(self, logit_scale=1.0): + """Constructor. + + Args: + logit_scale: When this value is high, the prediction is "diffused" and + when this value is low, the prediction is made peakier. + (default 1.0) + + """ + super(WeightedSoftmaxClassificationLoss, self).__init__() + self._logit_scale = logit_scale + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] + representing the value of the loss function. + """ + weights = tf.reduce_mean(weights, axis=2) + num_classes = prediction_tensor.get_shape().as_list()[-1] + prediction_tensor = tf.divide( + prediction_tensor, self._logit_scale, name='scale_logit') + per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( + labels=tf.reshape(target_tensor, [-1, num_classes]), + logits=tf.reshape(prediction_tensor, [-1, num_classes]))) + return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights + + +class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss): + """Softmax loss function against logits. + + Targets are expected to be provided in logits space instead of "one hot" or + "probability distribution" space. + """ + + def __init__(self, logit_scale=1.0): + """Constructor. + + Args: + logit_scale: When this value is high, the target is "diffused" and + when this value is low, the target is made peakier. + (default 1.0) + + """ + super(WeightedSoftmaxClassificationAgainstLogitsLoss, self).__init__() + self._logit_scale = logit_scale + + def _scale_and_softmax_logits(self, logits): + """Scale logits then apply softmax.""" + scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits') + return tf.nn.softmax(scaled_logits, name='convert_scores') + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing logit classification targets + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] + representing the value of the loss function. + """ + weights = tf.reduce_mean(weights, axis=2) + num_classes = prediction_tensor.get_shape().as_list()[-1] + target_tensor = self._scale_and_softmax_logits(target_tensor) + prediction_tensor = tf.divide(prediction_tensor, self._logit_scale, + name='scale_logits') + + per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( + labels=tf.reshape(target_tensor, [-1, num_classes]), + logits=tf.reshape(prediction_tensor, [-1, num_classes]))) + return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights + + +class BootstrappedSigmoidClassificationLoss(Loss): + """Bootstrapped sigmoid cross entropy classification loss function. + + This loss uses a convex combination of training labels and the current model's + predictions as training targets in the classification loss. The idea is that + as the model improves over time, its predictions can be trusted more and we + can use these predictions to mitigate the damage of noisy/incorrect labels, + because incorrect labels are likely to be eventually highly inconsistent with + other stimuli predicted to have the same label by the model. + + In "soft" bootstrapping, we use all predicted class probabilities, whereas in + "hard" bootstrapping, we use the single class favored by the model. + + See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by + Reed et al. (ICLR 2015). + """ + + def __init__(self, alpha, bootstrap_type='soft'): + """Constructor. + + Args: + alpha: a float32 scalar tensor between 0 and 1 representing interpolation + weight + bootstrap_type: set to either 'hard' or 'soft' (default) + + Raises: + ValueError: if bootstrap_type is not either 'hard' or 'soft' + """ + super(BootstrappedSigmoidClassificationLoss, self).__init__() + if bootstrap_type != 'hard' and bootstrap_type != 'soft': + raise ValueError('Unrecognized bootstrap_type: must be one of ' + '\'hard\' or \'soft.\'') + self._alpha = alpha + self._bootstrap_type = bootstrap_type + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors, num_classes] + representing the value of the loss function. + """ + if self._bootstrap_type == 'soft': + bootstrap_target_tensor = self._alpha * target_tensor + ( + 1.0 - self._alpha) * tf.sigmoid(prediction_tensor) + else: + bootstrap_target_tensor = self._alpha * target_tensor + ( + 1.0 - self._alpha) * tf.cast( + tf.sigmoid(prediction_tensor) > 0.5, tf.float32) + per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( + labels=bootstrap_target_tensor, logits=prediction_tensor)) + return per_entry_cross_ent * weights + + +class HardExampleMiner(object): + """Hard example mining for regions in a list of images. + + Implements hard example mining to select a subset of regions to be + back-propagated. For each image, selects the regions with highest losses, + subject to the condition that a newly selected region cannot have + an IOU > iou_threshold with any of the previously selected regions. + This can be achieved by re-using a greedy non-maximum suppression algorithm. + A constraint on the number of negatives mined per positive region can also be + enforced. + + Reference papers: "Training Region-based Object Detectors with Online + Hard Example Mining" (CVPR 2016) by Srivastava et al., and + "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al. + """ + + def __init__(self, + num_hard_examples=64, + iou_threshold=0.7, + loss_type='both', + cls_loss_weight=0.05, + loc_loss_weight=0.06, + max_negatives_per_positive=None, + min_negatives_per_image=0): + """Constructor. + + The hard example mining implemented by this class can replicate the behavior + in the two aforementioned papers (Srivastava et al., and Liu et al). + To replicate the A2 paper (Srivastava et al), num_hard_examples is set + to a fixed parameter (64 by default) and iou_threshold is set to .7 for + running non-max-suppression the predicted boxes prior to hard mining. + In order to replicate the SSD paper (Liu et al), num_hard_examples should + be set to None, max_negatives_per_positive should be 3 and iou_threshold + should be 1.0 (in order to effectively turn off NMS). + + Args: + num_hard_examples: maximum number of hard examples to be + selected per image (prior to enforcing max negative to positive ratio + constraint). If set to None, all examples obtained after NMS are + considered. + iou_threshold: minimum intersection over union for an example + to be discarded during NMS. + loss_type: use only classification losses ('cls', default), + localization losses ('loc') or both losses ('both'). + In the last case, cls_loss_weight and loc_loss_weight are used to + compute weighted sum of the two losses. + cls_loss_weight: weight for classification loss. + loc_loss_weight: weight for location loss. + max_negatives_per_positive: maximum number of negatives to retain for + each positive anchor. By default, num_negatives_per_positive is None, + which means that we do not enforce a prespecified negative:positive + ratio. Note also that num_negatives_per_positives can be a float + (and will be converted to be a float even if it is passed in otherwise). + min_negatives_per_image: minimum number of negative anchors to sample for + a given image. Setting this to a positive number allows sampling + negatives in an image without any positive anchors and thus not biased + towards at least one detection per image. + """ + self._num_hard_examples = num_hard_examples + self._iou_threshold = iou_threshold + self._loss_type = loss_type + self._cls_loss_weight = cls_loss_weight + self._loc_loss_weight = loc_loss_weight + self._max_negatives_per_positive = max_negatives_per_positive + self._min_negatives_per_image = min_negatives_per_image + if self._max_negatives_per_positive is not None: + self._max_negatives_per_positive = float(self._max_negatives_per_positive) + self._num_positives_list = None + self._num_negatives_list = None + + def __call__(self, + location_losses, + cls_losses, + decoded_boxlist_list, + match_list=None): + """Computes localization and classification losses after hard mining. + + Args: + location_losses: a float tensor of shape [num_images, num_anchors] + representing anchorwise localization losses. + cls_losses: a float tensor of shape [num_images, num_anchors] + representing anchorwise classification losses. + decoded_boxlist_list: a list of decoded BoxList representing location + predictions for each image. + match_list: an optional list of matcher.Match objects encoding the match + between anchors and groundtruth boxes for each image of the batch, + with rows of the Match objects corresponding to groundtruth boxes + and columns corresponding to anchors. Match objects in match_list are + used to reference which anchors are positive, negative or ignored. If + self._max_negatives_per_positive exists, these are then used to enforce + a prespecified negative to positive ratio. + + Returns: + mined_location_loss: a float scalar with sum of localization losses from + selected hard examples. + mined_cls_loss: a float scalar with sum of classification losses from + selected hard examples. + Raises: + ValueError: if location_losses, cls_losses and decoded_boxlist_list do + not have compatible shapes (i.e., they must correspond to the same + number of images). + ValueError: if match_list is specified but its length does not match + len(decoded_boxlist_list). + """ + mined_location_losses = [] + mined_cls_losses = [] + location_losses = tf.unstack(location_losses) + cls_losses = tf.unstack(cls_losses) + num_images = len(decoded_boxlist_list) + if not match_list: + match_list = num_images * [None] + if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses): + raise ValueError('location_losses, cls_losses and decoded_boxlist_list ' + 'do not have compatible shapes.') + if not isinstance(match_list, list): + raise ValueError('match_list must be a list.') + if len(match_list) != len(decoded_boxlist_list): + raise ValueError('match_list must either be None or have ' + 'length=len(decoded_boxlist_list).') + num_positives_list = [] + num_negatives_list = [] + for ind, detection_boxlist in enumerate(decoded_boxlist_list): + box_locations = detection_boxlist.get() + match = match_list[ind] + image_losses = cls_losses[ind] + if self._loss_type == 'loc': + image_losses = location_losses[ind] + elif self._loss_type == 'both': + image_losses *= self._cls_loss_weight + image_losses += location_losses[ind] * self._loc_loss_weight + if self._num_hard_examples is not None: + num_hard_examples = self._num_hard_examples + else: + num_hard_examples = detection_boxlist.num_boxes() + selected_indices = tf.image.non_max_suppression( + box_locations, image_losses, num_hard_examples, self._iou_threshold) + if self._max_negatives_per_positive is not None and match: + (selected_indices, num_positives, + num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio( + selected_indices, match, self._max_negatives_per_positive, + self._min_negatives_per_image) + num_positives_list.append(num_positives) + num_negatives_list.append(num_negatives) + mined_location_losses.append( + tf.reduce_sum(tf.gather(location_losses[ind], selected_indices))) + mined_cls_losses.append( + tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices))) + location_loss = tf.reduce_sum(tf.stack(mined_location_losses)) + cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses)) + if match and self._max_negatives_per_positive: + self._num_positives_list = num_positives_list + self._num_negatives_list = num_negatives_list + return (location_loss, cls_loss) + + def summarize(self): + """Summarize the number of positives and negatives after mining.""" + if self._num_positives_list and self._num_negatives_list: + avg_num_positives = tf.reduce_mean( + tf.cast(self._num_positives_list, dtype=tf.float32)) + avg_num_negatives = tf.reduce_mean( + tf.cast(self._num_negatives_list, dtype=tf.float32)) + tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives) + tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives) + + def _subsample_selection_to_desired_neg_pos_ratio(self, + indices, + match, + max_negatives_per_positive, + min_negatives_per_image=0): + """Subsample a collection of selected indices to a desired neg:pos ratio. + + This function takes a subset of M indices (indexing into a large anchor + collection of N anchors where M=0, + meaning that column i is matched with row match_results[i]. + (2) match_results[i]=-1, meaning that column i is not matched. + (3) match_results[i]=-2, meaning that column i is ignored. + use_matmul_gather: Use matrix multiplication based gather instead of + standard tf.gather. (Default: False). + + Raises: + ValueError: if match_results does not have rank 1 or is not an + integer int32 scalar tensor + """ + if match_results.shape.ndims != 1: + raise ValueError('match_results should have rank 1') + if match_results.dtype != tf.int32: + raise ValueError('match_results should be an int32 or int64 scalar ' + 'tensor') + self._match_results = match_results + self._gather_op = tf.gather + if use_matmul_gather: + self._gather_op = ops.matmul_gather_on_zeroth_axis + + @property + def match_results(self): + """The accessor for match results. + + Returns: + the tensor which encodes the match results. + """ + return self._match_results + + def matched_column_indices(self): + """Returns column indices that match to some row. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1))) + + def matched_column_indicator(self): + """Returns column indices that are matched. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return tf.greater_equal(self._match_results, 0) + + def num_matched_columns(self): + """Returns number (int32 scalar tensor) of matched columns.""" + return tf.size(self.matched_column_indices()) + + def unmatched_column_indices(self): + """Returns column indices that do not match any row. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1))) + + def unmatched_column_indicator(self): + """Returns column indices that are unmatched. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return tf.equal(self._match_results, -1) + + def num_unmatched_columns(self): + """Returns number (int32 scalar tensor) of unmatched columns.""" + return tf.size(self.unmatched_column_indices()) + + def ignored_column_indices(self): + """Returns column indices that are ignored (neither Matched nor Unmatched). + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self._reshape_and_cast(tf.where(self.ignored_column_indicator())) + + def ignored_column_indicator(self): + """Returns boolean column indicator where True means the colum is ignored. + + Returns: + column_indicator: boolean vector which is True for all ignored column + indices. + """ + return tf.equal(self._match_results, -2) + + def num_ignored_columns(self): + """Returns number (int32 scalar tensor) of matched columns.""" + return tf.size(self.ignored_column_indices()) + + def unmatched_or_ignored_column_indices(self): + """Returns column indices that are unmatched or ignored. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results))) + + def matched_row_indices(self): + """Returns row indices that match some column. + + The indices returned by this op are ordered so as to be in correspondence + with the output of matched_column_indicator(). For example if + self.matched_column_indicator() is [0,2], and self.matched_row_indices() is + [7, 3], then we know that column 0 was matched to row 7 and column 2 was + matched to row 3. + + Returns: + row_indices: int32 tensor of shape [K] with row indices. + """ + return self._reshape_and_cast( + self._gather_op(tf.cast(self._match_results, dtype=tf.float32), + self.matched_column_indices())) + + def num_matched_rows(self): + """Returns number (int32 scalar tensor) of matched rows.""" + unique_rows, _ = tf.unique(self.matched_row_indices()) + return tf.size(unique_rows) + + def _reshape_and_cast(self, t): + return tf.cast(tf.reshape(t, [-1]), tf.int32) + + def gather_based_on_match(self, input_tensor, unmatched_value, + ignored_value): + """Gathers elements from `input_tensor` based on match results. + + For columns that are matched to a row, gathered_tensor[col] is set to + input_tensor[match_results[col]]. For columns that are unmatched, + gathered_tensor[col] is set to unmatched_value. Finally, for columns that + are ignored gathered_tensor[col] is set to ignored_value. + + Note that the input_tensor.shape[1:] must match with unmatched_value.shape + and ignored_value.shape + + Args: + input_tensor: Tensor to gather values from. + unmatched_value: Constant tensor value for unmatched columns. + ignored_value: Constant tensor value for ignored columns. + + Returns: + gathered_tensor: A tensor containing values gathered from input_tensor. + The shape of the gathered tensor is [match_results.shape[0]] + + input_tensor.shape[1:]. + """ + input_tensor = tf.concat( + [tf.stack([ignored_value, unmatched_value]), + input_tensor], + axis=0) + gather_indices = tf.maximum(self.match_results + 2, 0) + gathered_tensor = self._gather_op(input_tensor, gather_indices) + return gathered_tensor + + +class Matcher(six.with_metaclass(abc.ABCMeta, object)): + """Abstract base class for matcher. + """ + + def __init__(self, use_matmul_gather=False): + """Constructs a Matcher. + + Args: + use_matmul_gather: Force constructed match objects to use matrix + multiplication based gather instead of standard tf.gather. + (Default: False). + """ + self._use_matmul_gather = use_matmul_gather + + def match(self, similarity_matrix, valid_rows=None, scope=None): + """Computes matches among row and column indices and returns the result. + + Computes matches among the row and column indices based on the similarity + matrix and optional arguments. + + Args: + similarity_matrix: Float tensor of shape [N, M] with pairwise similarity + where higher value means more similar. + valid_rows: A boolean tensor of shape [N] indicating the rows that are + valid for matching. + scope: Op scope name. Defaults to 'Match' if None. + + Returns: + A Match object with the results of matching. + """ + with tf.name_scope(scope, 'Match') as scope: + if valid_rows is None: + valid_rows = tf.ones(tf.shape(similarity_matrix)[0], dtype=tf.bool) + return Match(self._match(similarity_matrix, valid_rows), + self._use_matmul_gather) + + @abc.abstractmethod + def _match(self, similarity_matrix, valid_rows): + """Method to be overridden by implementations. + + Args: + similarity_matrix: Float tensor of shape [N, M] with pairwise similarity + where higher value means more similar. + valid_rows: A boolean tensor of shape [N] indicating the rows that are + valid for matching. + Returns: + match_results: Integer tensor of shape [M]: match_results[i]>=0 means + that column i is matched to row match_results[i], match_results[i]=-1 + means that the column is not matched. match_results[i]=-2 means that + the column is ignored (usually this happens when there is a very weak + match which one neither wants as positive nor negative example). + """ + pass diff --git a/core/matcher_test.py b/core/matcher_test.py new file mode 100644 index 0000000..1ed3216 --- /dev/null +++ b/core/matcher_test.py @@ -0,0 +1,197 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.matcher.""" +import numpy as np +import tensorflow as tf + +from object_detection.core import matcher + + +class MatchTest(tf.test.TestCase): + + def test_get_correct_matched_columnIndices(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indices = [0, 1, 3, 5] + matched_column_indices = match.matched_column_indices() + self.assertEqual(matched_column_indices.dtype, tf.int32) + with self.test_session() as sess: + matched_column_indices = sess.run(matched_column_indices) + self.assertAllEqual(matched_column_indices, expected_column_indices) + + def test_get_correct_counts(self): + match_results = tf.constant([3, 1, -1, 0, -1, 1, -2]) + match = matcher.Match(match_results) + exp_num_matched_columns = 4 + exp_num_unmatched_columns = 2 + exp_num_ignored_columns = 1 + exp_num_matched_rows = 3 + num_matched_columns = match.num_matched_columns() + num_unmatched_columns = match.num_unmatched_columns() + num_ignored_columns = match.num_ignored_columns() + num_matched_rows = match.num_matched_rows() + self.assertEqual(num_matched_columns.dtype, tf.int32) + self.assertEqual(num_unmatched_columns.dtype, tf.int32) + self.assertEqual(num_ignored_columns.dtype, tf.int32) + self.assertEqual(num_matched_rows.dtype, tf.int32) + with self.test_session() as sess: + (num_matched_columns_out, num_unmatched_columns_out, + num_ignored_columns_out, num_matched_rows_out) = sess.run( + [num_matched_columns, num_unmatched_columns, num_ignored_columns, + num_matched_rows]) + self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns) + self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns) + self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns) + self.assertAllEqual(num_matched_rows_out, exp_num_matched_rows) + + def testGetCorrectUnmatchedColumnIndices(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indices = [2, 4] + unmatched_column_indices = match.unmatched_column_indices() + self.assertEqual(unmatched_column_indices.dtype, tf.int32) + with self.test_session() as sess: + unmatched_column_indices = sess.run(unmatched_column_indices) + self.assertAllEqual(unmatched_column_indices, expected_column_indices) + + def testGetCorrectMatchedRowIndices(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_row_indices = [3, 1, 0, 5] + matched_row_indices = match.matched_row_indices() + self.assertEqual(matched_row_indices.dtype, tf.int32) + with self.test_session() as sess: + matched_row_inds = sess.run(matched_row_indices) + self.assertAllEqual(matched_row_inds, expected_row_indices) + + def test_get_correct_ignored_column_indices(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indices = [6] + ignored_column_indices = match.ignored_column_indices() + self.assertEqual(ignored_column_indices.dtype, tf.int32) + with self.test_session() as sess: + ignored_column_indices = sess.run(ignored_column_indices) + self.assertAllEqual(ignored_column_indices, expected_column_indices) + + def test_get_correct_matched_column_indicator(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indicator = [True, True, False, True, False, True, False] + matched_column_indicator = match.matched_column_indicator() + self.assertEqual(matched_column_indicator.dtype, tf.bool) + with self.test_session() as sess: + matched_column_indicator = sess.run(matched_column_indicator) + self.assertAllEqual(matched_column_indicator, expected_column_indicator) + + def test_get_correct_unmatched_column_indicator(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indicator = [False, False, True, False, True, False, False] + unmatched_column_indicator = match.unmatched_column_indicator() + self.assertEqual(unmatched_column_indicator.dtype, tf.bool) + with self.test_session() as sess: + unmatched_column_indicator = sess.run(unmatched_column_indicator) + self.assertAllEqual(unmatched_column_indicator, expected_column_indicator) + + def test_get_correct_ignored_column_indicator(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indicator = [False, False, False, False, False, False, True] + ignored_column_indicator = match.ignored_column_indicator() + self.assertEqual(ignored_column_indicator.dtype, tf.bool) + with self.test_session() as sess: + ignored_column_indicator = sess.run(ignored_column_indicator) + self.assertAllEqual(ignored_column_indicator, expected_column_indicator) + + def test_get_correct_unmatched_ignored_column_indices(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + match = matcher.Match(match_results) + expected_column_indices = [2, 4, 6] + unmatched_ignored_column_indices = (match. + unmatched_or_ignored_column_indices()) + self.assertEqual(unmatched_ignored_column_indices.dtype, tf.int32) + with self.test_session() as sess: + unmatched_ignored_column_indices = sess.run( + unmatched_ignored_column_indices) + self.assertAllEqual(unmatched_ignored_column_indices, + expected_column_indices) + + def test_all_columns_accounted_for(self): + # Note: deliberately setting to small number so not always + # all possibilities appear (matched, unmatched, ignored) + num_matches = 10 + match_results = tf.random_uniform( + [num_matches], minval=-2, maxval=5, dtype=tf.int32) + match = matcher.Match(match_results) + matched_column_indices = match.matched_column_indices() + unmatched_column_indices = match.unmatched_column_indices() + ignored_column_indices = match.ignored_column_indices() + with self.test_session() as sess: + matched, unmatched, ignored = sess.run([ + matched_column_indices, unmatched_column_indices, + ignored_column_indices + ]) + all_indices = np.hstack((matched, unmatched, ignored)) + all_indices_sorted = np.sort(all_indices) + self.assertAllEqual(all_indices_sorted, + np.arange(num_matches, dtype=np.int32)) + + def test_scalar_gather_based_on_match(self): + match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) + input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32) + expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200] + match = matcher.Match(match_results) + gathered_tensor = match.gather_based_on_match(input_tensor, + unmatched_value=100., + ignored_value=200.) + self.assertEqual(gathered_tensor.dtype, tf.float32) + with self.test_session(): + gathered_tensor_out = gathered_tensor.eval() + self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) + + def test_multidimensional_gather_based_on_match(self): + match_results = tf.constant([1, -1, -2]) + input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]], + dtype=tf.float32) + expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]] + match = matcher.Match(match_results) + gathered_tensor = match.gather_based_on_match(input_tensor, + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + self.assertEqual(gathered_tensor.dtype, tf.float32) + with self.test_session(): + gathered_tensor_out = gathered_tensor.eval() + self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) + + def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self): + match_results = tf.constant([1, -1, -2]) + input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]], + dtype=tf.float32) + expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]] + match = matcher.Match(match_results, use_matmul_gather=True) + gathered_tensor = match.gather_based_on_match(input_tensor, + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + self.assertEqual(gathered_tensor.dtype, tf.float32) + with self.test_session() as sess: + self.assertTrue( + all([op.name is not 'Gather' for op in sess.graph.get_operations()])) + gathered_tensor_out = gathered_tensor.eval() + self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out) + +if __name__ == '__main__': + tf.test.main() diff --git a/core/minibatch_sampler.py b/core/minibatch_sampler.py new file mode 100644 index 0000000..7628c8d --- /dev/null +++ b/core/minibatch_sampler.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base minibatch sampler module. + +The job of the minibatch_sampler is to subsample a minibatch based on some +criterion. + +The main function call is: + subsample(indicator, batch_size, **params). +Indicator is a 1d boolean tensor where True denotes which examples can be +sampled. It returns a boolean indicator where True denotes an example has been +sampled.. + +Subclasses should implement the Subsample function and can make use of the +@staticmethod SubsampleIndicator. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from abc import ABCMeta +from abc import abstractmethod + +import six +import tensorflow as tf + +from object_detection.utils import ops + + +class MinibatchSampler(six.with_metaclass(ABCMeta, object)): + """Abstract base class for subsampling minibatches.""" + + def __init__(self): + """Constructs a minibatch sampler.""" + pass + + @abstractmethod + def subsample(self, indicator, batch_size, **params): + """Returns subsample of entries in indicator. + + Args: + indicator: boolean tensor of shape [N] whose True entries can be sampled. + batch_size: desired batch size. + **params: additional keyword arguments for specific implementations of + the MinibatchSampler. + + Returns: + sample_indicator: boolean tensor of shape [N] whose True entries have been + sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size + """ + pass + + @staticmethod + def subsample_indicator(indicator, num_samples): + """Subsample indicator vector. + + Given a boolean indicator vector with M elements set to `True`, the function + assigns all but `num_samples` of these previously `True` elements to + `False`. If `num_samples` is greater than M, the original indicator vector + is returned. + + Args: + indicator: a 1-dimensional boolean tensor indicating which elements + are allowed to be sampled and which are not. + num_samples: int32 scalar tensor + + Returns: + a boolean tensor with the same shape as input (indicator) tensor + """ + indices = tf.where(indicator) + indices = tf.random_shuffle(indices) + indices = tf.reshape(indices, [-1]) + + num_samples = tf.minimum(tf.size(indices), num_samples) + selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) + + selected_indicator = ops.indices_to_dense_vector(selected_indices, + tf.shape(indicator)[0]) + + return tf.equal(selected_indicator, 1) diff --git a/core/minibatch_sampler_test.py b/core/minibatch_sampler_test.py new file mode 100644 index 0000000..7420ae5 --- /dev/null +++ b/core/minibatch_sampler_test.py @@ -0,0 +1,82 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for google3.research.vale.object_detection.minibatch_sampler.""" + +import numpy as np +import tensorflow as tf + +from object_detection.core import minibatch_sampler + + +class MinibatchSamplerTest(tf.test.TestCase): + + def test_subsample_indicator_when_more_true_elements_than_num_samples(self): + np_indicator = [True, False, True, False, True, True, False] + indicator = tf.constant(np_indicator) + samples = minibatch_sampler.MinibatchSampler.subsample_indicator( + indicator, 3) + with self.test_session() as sess: + samples_out = sess.run(samples) + self.assertTrue(np.sum(samples_out), 3) + self.assertAllEqual(samples_out, + np.logical_and(samples_out, np_indicator)) + + def test_subsample_when_more_true_elements_than_num_samples_no_shape(self): + np_indicator = [True, False, True, False, True, True, False] + indicator = tf.placeholder(tf.bool) + feed_dict = {indicator: np_indicator} + + samples = minibatch_sampler.MinibatchSampler.subsample_indicator( + indicator, 3) + with self.test_session() as sess: + samples_out = sess.run(samples, feed_dict=feed_dict) + self.assertTrue(np.sum(samples_out), 3) + self.assertAllEqual(samples_out, + np.logical_and(samples_out, np_indicator)) + + def test_subsample_indicator_when_less_true_elements_than_num_samples(self): + np_indicator = [True, False, True, False, True, True, False] + indicator = tf.constant(np_indicator) + samples = minibatch_sampler.MinibatchSampler.subsample_indicator( + indicator, 5) + with self.test_session() as sess: + samples_out = sess.run(samples) + self.assertTrue(np.sum(samples_out), 4) + self.assertAllEqual(samples_out, + np.logical_and(samples_out, np_indicator)) + + def test_subsample_indicator_when_num_samples_is_zero(self): + np_indicator = [True, False, True, False, True, True, False] + indicator = tf.constant(np_indicator) + samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator( + indicator, 0) + with self.test_session() as sess: + samples_none_out = sess.run(samples_none) + self.assertAllEqual( + np.zeros_like(samples_none_out, dtype=bool), + samples_none_out) + + def test_subsample_indicator_when_indicator_all_false(self): + indicator_empty = tf.zeros([0], dtype=tf.bool) + samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator( + indicator_empty, 4) + with self.test_session() as sess: + samples_empty_out = sess.run(samples_empty) + self.assertEqual(0, samples_empty_out.size) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/model.py b/core/model.py new file mode 100644 index 0000000..b04d625 --- /dev/null +++ b/core/model.py @@ -0,0 +1,375 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Abstract detection model. + +This file defines a generic base class for detection models. Programs that are +designed to work with arbitrary detection models should only depend on this +class. We intend for the functions in this class to follow tensor-in/tensor-out +design, thus all functions have tensors or lists/dictionaries holding tensors as +inputs and outputs. + +Abstractly, detection models predict output tensors given input images +which can be passed to a loss function at training time or passed to a +postprocessing function at eval time. The computation graphs at a high level +consequently look as follows: + +Training time: +inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor) + +Evaluation time: +inputs (images tensor) -> preprocess -> predict -> postprocess + -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor) + +DetectionModels must thus implement four functions (1) preprocess, (2) predict, +(3) postprocess and (4) loss. DetectionModels should make no assumptions about +the input size or aspect ratio --- they are responsible for doing any +resize/reshaping necessary (see docstring for the preprocess function). +Output classes are always integers in the range [0, num_classes). Any mapping +of these integers to semantic labels is to be handled outside of this class. + +Images are resized in the `preprocess` method. All of `preprocess`, `predict`, +and `postprocess` should be reentrant. + +The `preprocess` method runs `image_resizer_fn` that returns resized_images and +`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros, +true_image_shapes indicate the slices that contain the image without padding. +This is useful for padding images to be a fixed size for batching. + +The `postprocess` method uses the true image shapes to clip predictions that lie +outside of images. + +By default, DetectionModels produce bounding box detections; However, we support +a handful of auxiliary annotations associated with each bounding box, namely, +instance masks and keypoints. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import six +import tensorflow as tf + +from object_detection.core import standard_fields as fields + + +# If using a new enough version of TensorFlow, detection models should be a +# tf module or keras model for tracking. +try: + _BaseClass = tf.Module +except AttributeError: + _BaseClass = object + + +class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): + """Abstract base class for detection models. + + Extends tf.Module to guarantee variable tracking. + """ + + def __init__(self, num_classes): + """Constructor. + + Args: + num_classes: number of classes. Note that num_classes *does not* include + background categories that might be implicitly predicted in various + implementations. + """ + self._num_classes = num_classes + self._groundtruth_lists = {} + + @property + def num_classes(self): + return self._num_classes + + def groundtruth_lists(self, field): + """Access list of groundtruth tensors. + + Args: + field: a string key, options are + fields.BoxListFields.{boxes,classes,masks,keypoints} or + fields.InputDataFields.is_annotated. + + Returns: + a list of tensors holding groundtruth information (see also + provide_groundtruth function below), with one entry for each image in the + batch. + Raises: + RuntimeError: if the field has not been provided via provide_groundtruth. + """ + if field not in self._groundtruth_lists: + raise RuntimeError('Groundtruth tensor {} has not been provided'.format( + field)) + return self._groundtruth_lists[field] + + def groundtruth_has_field(self, field): + """Determines whether the groundtruth includes the given field. + + Args: + field: a string key, options are + fields.BoxListFields.{boxes,classes,masks,keypoints} or + fields.InputDataFields.is_annotated. + + Returns: + True if the groundtruth includes the given field, False otherwise. + """ + return field in self._groundtruth_lists + + @abc.abstractmethod + def preprocess(self, inputs): + """Input preprocessing. + + To be overridden by implementations. + + This function is responsible for any scaling/shifting of input values that + is necessary prior to running the detector on an input image. + It is also responsible for any resizing, padding that might be necessary + as images are assumed to arrive in arbitrary sizes. While this function + could conceivably be part of the predict method (below), it is often + convenient to keep these separate --- for example, we may want to preprocess + on one device, place onto a queue, and let another device (e.g., the GPU) + handle prediction. + + A few important notes about the preprocess function: + + We assume that this operation does not have any trainable variables nor + does it affect the groundtruth annotations in any way (thus data + augmentation operations such as random cropping should be performed + externally). + + There is no assumption that the batchsize in this function is the same as + the batch size in the predict function. In fact, we recommend calling the + preprocess function prior to calling any batching operations (which should + happen outside of the model) and thus assuming that batch sizes are equal + to 1 in the preprocess function. + + There is also no explicit assumption that the output resolutions + must be fixed across inputs --- this is to support "fully convolutional" + settings in which input images can have different shapes/resolutions. + + Args: + inputs: a [batch, height_in, width_in, channels] float32 tensor + representing a batch of images with values between 0 and 255.0. + + Returns: + preprocessed_inputs: a [batch, height_out, width_out, channels] float32 + tensor representing a batch of images. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + """ + pass + + @abc.abstractmethod + def predict(self, preprocessed_inputs, true_image_shapes): + """Predict prediction tensors from inputs tensor. + + Outputs of this function can be passed to loss or postprocess functions. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float32 tensor + representing a batch of images. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + + Returns: + prediction_dict: a dictionary holding prediction tensors to be + passed to the Loss or Postprocess functions. + """ + pass + + @abc.abstractmethod + def postprocess(self, prediction_dict, true_image_shapes, **params): + """Convert predicted output tensors to final detections. + + This stage typically performs a few things such as + * Non-Max Suppression to remove overlapping detection boxes. + * Score conversion and background class removal. + + Outputs adhere to the following conventions: + * Classes are integers in [0, num_classes); background classes are removed + and the first non-background class is mapped to 0. If the model produces + class-agnostic detections, then no output is produced for classes. + * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max] + format and normalized relative to the image window. + * `num_detections` is provided for settings where detections are padded to a + fixed number of boxes. + * We do not specifically assume any kind of probabilistic interpretation + of the scores --- the only important thing is their relative ordering. + Thus implementations of the postprocess function are free to output + logits, probabilities, calibrated probabilities, or anything else. + + Args: + prediction_dict: a dictionary holding prediction tensors. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + **params: Additional keyword arguments for specific implementations of + DetectionModel. + + Returns: + detections: a dictionary containing the following fields + detection_boxes: [batch, max_detections, 4] + detection_scores: [batch, max_detections] + detection_classes: [batch, max_detections] + (If a model is producing class-agnostic detections, this field may be + missing) + instance_masks: [batch, max_detections, image_height, image_width] + (optional) + keypoints: [batch, max_detections, num_keypoints, 2] (optional) + num_detections: [batch] + + In addition to the above fields this stage also outputs the following + raw tensors: + + raw_detection_boxes: [batch, total_detections, 4] tensor containing + all detection boxes from `prediction_dict` in the format + [ymin, xmin, ymax, xmax] and normalized co-ordinates. + raw_detection_scores: [batch, total_detections, + num_classes_with_background] tensor of class score logits for + raw detection boxes. + """ + pass + + @abc.abstractmethod + def loss(self, prediction_dict, true_image_shapes): + """Compute scalar loss tensors with respect to provided groundtruth. + + Calling this function requires that groundtruth tensors have been + provided via the provide_groundtruth function. + + Args: + prediction_dict: a dictionary holding predicted tensors + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + + Returns: + a dictionary mapping strings (loss names) to scalar tensors representing + loss values. + """ + pass + + def provide_groundtruth(self, + groundtruth_boxes_list, + groundtruth_classes_list, + groundtruth_masks_list=None, + groundtruth_keypoints_list=None, + groundtruth_weights_list=None, + groundtruth_confidences_list=None, + groundtruth_is_crowd_list=None, + is_annotated_list=None): + """Provide groundtruth tensors. + + Args: + groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape + [num_boxes, 4] containing coordinates of the groundtruth boxes. + Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] + format and assumed to be normalized and clipped + relative to the image window with y_min <= y_max and x_min <= x_max. + groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot) + tensors of shape [num_boxes, num_classes] containing the class targets + with the 0th index assumed to map to the first non-background class. + groundtruth_masks_list: a list of 3-D tf.float32 tensors of + shape [num_boxes, height_in, width_in] containing instance + masks with values in {0, 1}. If None, no masks are provided. + Mask resolution `height_in`x`width_in` must agree with the resolution + of the input image tensor provided to the `preprocess` function. + groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of + shape [num_boxes, num_keypoints, 2] containing keypoints. + Keypoints are assumed to be provided in normalized coordinates and + missing keypoints should be encoded as NaN. + groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape + [num_boxes] containing weights for groundtruth boxes. + groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape + [num_boxes, num_classes] containing class confidences for groundtruth + boxes. + groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape + [num_boxes] containing is_crowd annotations + is_annotated_list: A list of scalar tf.bool tensors indicating whether + images have been labeled or not. + """ + self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list + self._groundtruth_lists[ + fields.BoxListFields.classes] = groundtruth_classes_list + if groundtruth_weights_list: + self._groundtruth_lists[fields.BoxListFields. + weights] = groundtruth_weights_list + if groundtruth_confidences_list: + self._groundtruth_lists[fields.BoxListFields. + confidences] = groundtruth_confidences_list + if groundtruth_masks_list: + self._groundtruth_lists[ + fields.BoxListFields.masks] = groundtruth_masks_list + if groundtruth_keypoints_list: + self._groundtruth_lists[ + fields.BoxListFields.keypoints] = groundtruth_keypoints_list + if groundtruth_is_crowd_list: + self._groundtruth_lists[ + fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list + if is_annotated_list: + self._groundtruth_lists[ + fields.InputDataFields.is_annotated] = is_annotated_list + + @abc.abstractmethod + def regularization_losses(self): + """Returns a list of regularization losses for this model. + + Returns a list of regularization losses for this model that the estimator + needs to use during training/optimization. + + Returns: + A list of regularization loss tensors. + """ + pass + + @abc.abstractmethod + def restore_map(self, fine_tune_checkpoint_type='detection'): + """Returns a map of variables to load from a foreign checkpoint. + + Returns a map of variable names to load from a checkpoint to variables in + the model graph. This enables the model to initialize based on weights from + another task. For example, the feature extractor variables from a + classification model can be used to bootstrap training of an object + detector. When loading from an object detection model, the checkpoint model + should have the same parameters as this detection model with exception of + the num_classes parameter. + + Args: + fine_tune_checkpoint_type: whether to restore from a full detection + checkpoint (with compatible variable names) or to restore from a + classification checkpoint for initialization prior to training. + Valid values: `detection`, `classification`. Default 'detection'. + + Returns: + A dict mapping variable names (to load from a checkpoint) to variables in + the model graph. + """ + pass + + @abc.abstractmethod + def updates(self): + """Returns a list of update operators for this model. + + Returns a list of update operators for this model that must be executed at + each training step. The estimator's train op needs to have a control + dependency on these updates. + + Returns: + A list of update operators. + """ + pass diff --git a/core/multiclass_nms_test.py b/core/multiclass_nms_test.py new file mode 100644 index 0000000..7cc01c6 --- /dev/null +++ b/core/multiclass_nms_test.py @@ -0,0 +1,526 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for tensorflow_models.object_detection.core.post_processing.""" +import numpy as np +import tensorflow as tf +from object_detection.core import post_processing +from object_detection.core import standard_fields as fields +from object_detection.utils import test_case + + +class MulticlassNonMaxSuppressionTest(test_case.TestCase): + + def test_multiclass_nms_select_with_shared_boxes(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + def test_multiclass_nms_select_with_shared_boxes_pad_to_max_output_size(self): + boxes = np.array([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], np.float32) + scores = np.array([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]], np.float32) + score_thresh = 0.1 + iou_thresh = .5 + max_size_per_class = 4 + max_output_size = 5 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + + def graph_fn(boxes, scores): + nms, num_valid_nms_boxes = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_size_per_class, + max_total_size=max_output_size, + pad_to_max_output_size=True) + return [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes), num_valid_nms_boxes] + + [nms_corners_output, nms_scores_output, nms_classes_output, + num_valid_nms_boxes] = self.execute(graph_fn, [boxes, scores]) + + self.assertEqual(num_valid_nms_boxes, 4) + self.assertAllClose(nms_corners_output[0:num_valid_nms_boxes], + exp_nms_corners) + self.assertAllClose(nms_scores_output[0:num_valid_nms_boxes], + exp_nms_scores) + self.assertAllClose(nms_classes_output[0:num_valid_nms_boxes], + exp_nms_classes) + + def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + num_keypoints = 6 + keypoints = tf.tile( + tf.reshape(tf.range(8), [8, 1, 1]), + [1, num_keypoints, 2]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + exp_nms_keypoints_tensor = tf.tile( + tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), + [1, num_keypoints, 2]) + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_output_size, + additional_fields={fields.BoxListFields.keypoints: keypoints}) + + with self.test_session() as sess: + (nms_corners_output, + nms_scores_output, + nms_classes_output, + nms_keypoints, + exp_nms_keypoints) = sess.run([ + nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes), + nms.get_field(fields.BoxListFields.keypoints), + exp_nms_keypoints_tensor + ]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + self.assertAllEqual(nms_keypoints, exp_nms_keypoints) + + def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + + num_boxes = tf.shape(boxes)[0] + heatmap_height = 5 + heatmap_width = 5 + num_keypoints = 17 + keypoint_heatmaps = tf.ones( + [num_boxes, heatmap_height, heatmap_width, num_keypoints], + dtype=tf.float32) + + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + exp_nms_keypoint_heatmaps = np.ones( + (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32) + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_output_size, + additional_fields={ + fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps + }) + + with self.test_session() as sess: + (nms_corners_output, + nms_scores_output, + nms_classes_output, + nms_keypoint_heatmaps) = sess.run( + [nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes), + nms.get_field(fields.BoxListFields.keypoint_heatmaps)]) + + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps) + + def test_multiclass_nms_with_additional_fields(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + + coarse_boxes_key = 'coarse_boxes' + coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1], + [0.1, 0.2, 1.1, 1.2], + [0.1, -0.2, 1.1, 1.0], + [0.1, 10.1, 1.1, 11.1], + [0.1, 10.2, 1.1, 11.2], + [0.1, 100.1, 1.1, 101.1], + [0.1, 1000.1, 1.1, 1002.1], + [0.1, 1000.1, 1.1, 1002.2]], tf.float32) + + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = np.array([[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]], dtype=np.float32) + + exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1], + [0.1, 0.1, 1.1, 1.1], + [0.1, 1000.1, 1.1, 1002.1], + [0.1, 100.1, 1.1, 101.1]], + dtype=np.float32) + + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_output_size, + additional_fields={coarse_boxes_key: coarse_boxes}) + + with self.test_session() as sess: + (nms_corners_output, + nms_scores_output, + nms_classes_output, + nms_coarse_corners) = sess.run( + [nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes), + nms.get_field(coarse_boxes_key)]) + + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners) + + def test_multiclass_nms_select_with_shared_boxes_given_masks(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + num_classes = 2 + mask_height = 3 + mask_width = 3 + masks = tf.tile( + tf.reshape(tf.range(8), [8, 1, 1, 1]), + [1, num_classes, mask_height, mask_width]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + exp_nms_masks_tensor = tf.tile( + tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), + [1, mask_height, mask_width]) + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_output_size, masks=masks) + with self.test_session() as sess: + (nms_corners_output, + nms_scores_output, + nms_classes_output, + nms_masks, + exp_nms_masks) = sess.run([nms.get(), + nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes), + nms.get_field(fields.BoxListFields.masks), + exp_nms_masks_tensor]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + self.assertAllEqual(nms_masks, exp_nms_masks) + + def test_multiclass_nms_select_with_clip_window(self): + boxes = tf.constant([[[0, 0, 10, 10]], + [[1, 1, 11, 11]]], tf.float32) + scores = tf.constant([[.9], [.75]]) + clip_window = tf.constant([5, 4, 8, 7], tf.float32) + score_thresh = 0.0 + iou_thresh = 0.5 + max_output_size = 100 + + exp_nms_corners = [[5, 4, 8, 7]] + exp_nms_scores = [.9] + exp_nms_classes = [0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_output_size, + clip_window=clip_window) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self): + boxes = tf.constant([[[0, 0, 10, 10]], + [[1, 1, 11, 11]]], tf.float32) + scores = tf.constant([[.9], [.75]]) + clip_window = tf.constant([5, 4, 8, 7], tf.float32) + score_thresh = 0.0 + iou_thresh = 0.5 + max_output_size = 100 + + exp_nms_corners = [[0, 0, 1, 1]] + exp_nms_scores = [.9] + exp_nms_classes = [0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, + scores, + score_thresh, + iou_thresh, + max_output_size, + clip_window=clip_window, + change_coordinate_frame=True) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + def test_multiclass_nms_select_with_per_class_cap(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_size_per_class = 2 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 1000, 1, 1002]] + exp_nms_scores = [.95, .9, .85] + exp_nms_classes = [0, 0, 1] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_size_per_class) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + def test_multiclass_nms_select_with_total_cap(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_size_per_class = 4 + max_total_size = 2 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1]] + exp_nms_scores = [.95, .9] + exp_nms_classes = [0, 0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_size_per_class, + max_total_size) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + def test_multiclass_nms_threshold_then_select_with_shared_boxes(self): + boxes = tf.constant([[[0, 0, 1, 1]], + [[0, 0.1, 1, 1.1]], + [[0, -0.1, 1, 0.9]], + [[0, 10, 1, 11]], + [[0, 10.1, 1, 11.1]], + [[0, 100, 1, 101]], + [[0, 1000, 1, 1002]], + [[0, 1000, 1, 1002.1]]], tf.float32) + scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 3 + + exp_nms = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 100, 1, 101]] + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_output = sess.run(nms.get()) + self.assertAllClose(nms_output, exp_nms) + + def test_multiclass_nms_select_with_separate_boxes(self): + boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]], + [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], + [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], + [[0, 10, 1, 11], [0, 10, 1, 11]], + [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], + [[0, 100, 1, 101], [0, 100, 1, 101]], + [[0, 1000, 1, 1002], [0, 999, 2, 1004]], + [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]], + tf.float32) + scores = tf.constant([[.9, 0.01], [.75, 0.05], + [.6, 0.01], [.95, 0], + [.5, 0.01], [.3, 0.01], + [.01, .85], [.01, .5]]) + score_thresh = 0.1 + iou_thresh = .5 + max_output_size = 4 + + exp_nms_corners = [[0, 10, 1, 11], + [0, 0, 1, 1], + [0, 999, 2, 1004], + [0, 100, 1, 101]] + exp_nms_scores = [.95, .9, .85, .3] + exp_nms_classes = [0, 0, 1, 0] + + nms, _ = post_processing.multiclass_non_max_suppression( + boxes, scores, score_thresh, iou_thresh, max_output_size) + with self.test_session() as sess: + nms_corners_output, nms_scores_output, nms_classes_output = sess.run( + [nms.get(), nms.get_field(fields.BoxListFields.scores), + nms.get_field(fields.BoxListFields.classes)]) + self.assertAllClose(nms_corners_output, exp_nms_corners) + self.assertAllClose(nms_scores_output, exp_nms_scores) + self.assertAllClose(nms_classes_output, exp_nms_classes) + + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/post_processing.py b/core/post_processing.py new file mode 100644 index 0000000..90f1e06 --- /dev/null +++ b/core/post_processing.py @@ -0,0 +1,1223 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Post-processing operations on detected boxes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import numpy as np +from six.moves import range +from six.moves import zip +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import standard_fields as fields +from object_detection.utils import shape_utils + +_NMS_TILE_SIZE = 512 + + +def batch_iou(boxes1, boxes2): + """Calculates the overlap between proposal and ground truth boxes. + + Some `boxes2` may have been padded. The returned `iou` tensor for these + boxes will be -1. + + Args: + boxes1: a tensor with a shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment. The last dimension is the pixel + coordinates in [ymin, xmin, ymax, xmax] form. + boxes2: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This + tensor might have paddings with a negative value. + + Returns: + iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES]. + """ + with tf.name_scope('BatchIOU'): + y1_min, x1_min, y1_max, x1_max = tf.split( + value=boxes1, num_or_size_splits=4, axis=2) + y2_min, x2_min, y2_max, x2_max = tf.split( + value=boxes2, num_or_size_splits=4, axis=2) + + # Calculates the intersection area. + intersection_xmin = tf.maximum(x1_min, tf.transpose(x2_min, [0, 2, 1])) + intersection_xmax = tf.minimum(x1_max, tf.transpose(x2_max, [0, 2, 1])) + intersection_ymin = tf.maximum(y1_min, tf.transpose(y2_min, [0, 2, 1])) + intersection_ymax = tf.minimum(y1_max, tf.transpose(y2_max, [0, 2, 1])) + intersection_area = tf.maximum( + (intersection_xmax - intersection_xmin), 0) * tf.maximum( + (intersection_ymax - intersection_ymin), 0) + + # Calculates the union area. + area1 = (y1_max - y1_min) * (x1_max - x1_min) + area2 = (y2_max - y2_min) * (x2_max - x2_min) + # Adds a small epsilon to avoid divide-by-zero. + union_area = area1 + tf.transpose(area2, + [0, 2, 1]) - intersection_area + 1e-8 + + # Calculates IoU. + iou = intersection_area / union_area + + # Fills -1 for padded ground truth boxes. + padding_mask = tf.logical_and( + tf.less(intersection_xmax, 0), tf.less(intersection_ymax, 0)) + iou = tf.where(padding_mask, -tf.ones_like(iou), iou) + + return iou + + +def _self_suppression(iou, iou_threshold, loop_condition, iou_sum): + """Bounding-boxes self-suppression loop body. + + Args: + iou: A float Tensor with shape [1, num_boxes, max_num_instance]: IOUs. + iou_threshold: A scalar, representing IOU threshold. + loop_condition: The loop condition returned from last iteration. + iou_sum: iou_sum_new returned from last iteration. + + Returns: + iou_suppressed: A float Tensor with shape [1, num_boxes, max_num_instance], + IOU after suppression. + iou_threshold: A scalar, representing IOU threshold. + loop_condition: Bool Tensor of shape [], the loop condition. + iou_sum_new: The new IOU sum. + """ + del loop_condition + can_suppress_others = tf.cast( + tf.reshape(tf.reduce_max(iou, 1) <= iou_threshold, [1, -1, 1]), iou.dtype) + iou_suppressed = tf.reshape( + tf.cast( + tf.reduce_max(can_suppress_others * iou, 1) <= iou_threshold, + iou.dtype), [1, -1, 1]) * iou + iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2]) + return [ + iou_suppressed, iou_threshold, + tf.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new + ] + + +def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): + """Bounding-boxes cross-suppression loop body. + + Args: + boxes: A float Tensor of shape [1, anchors, 4], representing boxes. + box_slice: A float Tensor of shape [1, _NMS_TILE_SIZE, 4], the box tile + returned from last iteration + iou_threshold: A scalar, representing IOU threshold. + inner_idx: A scalar, representing inner index. + + Returns: + boxes: A float Tensor of shape [1, anchors, 4], representing boxes. + ret_slice: A float Tensor of shape [1, _NMS_TILE_SIZE, 4], the box tile + after suppression + iou_threshold: A scalar, representing IOU threshold. + inner_idx: A scalar, inner index incremented. + """ + new_slice = tf.slice(boxes, [0, inner_idx * _NMS_TILE_SIZE, 0], + [1, _NMS_TILE_SIZE, 4]) + iou = batch_iou(new_slice, box_slice) + ret_slice = tf.expand_dims( + tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), + 2) * box_slice + return boxes, ret_slice, iou_threshold, inner_idx + 1 + + +def _suppression_loop_body(boxes, iou_threshold, output_size, idx): + """Process boxes in the range [idx*_NMS_TILE_SIZE, (idx+1)*_NMS_TILE_SIZE). + + Args: + boxes: a tensor with a shape of [1, anchors, 4]. + iou_threshold: a float representing the threshold for deciding whether boxes + overlap too much with respect to IOU. + output_size: an int32 tensor of size [1]. Representing the number of + selected boxes. + idx: an integer scalar representing induction variable. + + Returns: + boxes: updated boxes. + iou_threshold: pass down iou_threshold to the next iteration. + output_size: the updated output_size. + idx: the updated induction variable. + """ + num_tiles = tf.shape(boxes)[1] // _NMS_TILE_SIZE + + # Iterates over tiles that can possibly suppress the current tile. + box_slice = tf.slice(boxes, [0, idx * _NMS_TILE_SIZE, 0], + [1, _NMS_TILE_SIZE, 4]) + _, box_slice, _, _ = tf.while_loop( + lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, + _cross_suppression, [boxes, box_slice, iou_threshold, + tf.constant(0)]) + + # Iterates over the current tile to compute self-suppression. + iou = batch_iou(box_slice, box_slice) + mask = tf.expand_dims( + tf.reshape(tf.range(_NMS_TILE_SIZE), [1, -1]) > tf.reshape( + tf.range(_NMS_TILE_SIZE), [-1, 1]), 0) + iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) + suppressed_iou, _, _, _ = tf.while_loop( + lambda _iou, _threshold, loop_condition, _iou_sum: loop_condition, + _self_suppression, + [iou, iou_threshold, + tf.constant(True), + tf.reduce_sum(iou, [1, 2])]) + suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 + box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) + + # Uses box_slice to update the input boxes. + mask = tf.reshape( + tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) + boxes = tf.tile(tf.expand_dims(box_slice, [1]), + [1, num_tiles, 1, 1]) * mask + tf.reshape( + boxes, [1, num_tiles, _NMS_TILE_SIZE, 4]) * (1 - mask) + boxes = tf.reshape(boxes, [1, -1, 4]) + + # Updates output_size. + output_size += tf.reduce_sum( + tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) + return boxes, iou_threshold, output_size, idx + 1 + + +def partitioned_non_max_suppression_padded(boxes, + scores, + max_output_size, + iou_threshold=0.5, + score_threshold=float('-inf')): + """A tiled version of [`tf.image.non_max_suppression_padded`](https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression_padded). + + The overall design of the algorithm is to handle boxes tile-by-tile: + + boxes = boxes.pad_to_multiple_of(tile_size) + num_tiles = len(boxes) // tile_size + output_boxes = [] + for i in range(num_tiles): + box_tile = boxes[i*tile_size : (i+1)*tile_size] + for j in range(i - 1): + suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] + iou = batch_iou(box_tile, suppressing_tile) + # if the box is suppressed in iou, clear it to a dot + box_tile *= _update_boxes(iou) + # Iteratively handle the diagonal tile. + iou = _box_overlap(box_tile, box_tile) + iou_changed = True + while iou_changed: + # boxes that are not suppressed by anything else + suppressing_boxes = _get_suppressing_boxes(iou) + # boxes that are suppressed by suppressing_boxes + suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) + # clear iou to 0 for boxes that are suppressed, as they cannot be used + # to suppress other boxes any more + new_iou = _clear_iou(iou, suppressed_boxes) + iou_changed = (new_iou != iou) + iou = new_iou + # remaining boxes that can still suppress others, are selected boxes. + output_boxes.append(_get_suppressing_boxes(iou)) + if len(output_boxes) >= max_output_size: + break + + Args: + boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. + scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single + score corresponding to each box (each row of boxes). + max_output_size: a scalar integer `Tensor` representing the maximum number + of boxes to be selected by non max suppression. + iou_threshold: a float representing the threshold for deciding whether boxes + overlap too much with respect to IOU. + score_threshold: A float representing the threshold for deciding when to + remove boxes based on score. + + Returns: + selected_indices: a tensor of shape [anchors]. + num_valid_boxes: a scalar int tensor. + nms_proposals: a tensor with a shape of [anchors, 4]. It has + same dtype as input boxes. + nms_scores: a tensor with a shape of [anchors]. It has same + dtype as input scores. + argsort_ids: a tensor of shape [anchors], mapping from input order of boxes + to output order of boxes. + """ + num_boxes = tf.shape(boxes)[0] + pad = tf.cast( + tf.ceil(tf.cast(num_boxes, tf.float32) / _NMS_TILE_SIZE), + tf.int32) * _NMS_TILE_SIZE - num_boxes + + scores, argsort_ids = tf.nn.top_k(scores, k=num_boxes, sorted=True) + boxes = tf.gather(boxes, argsort_ids) + num_boxes = tf.shape(boxes)[0] + num_boxes += pad + boxes = tf.pad( + tf.cast(boxes, tf.float32), [[0, pad], [0, 0]], constant_values=-1) + scores = tf.pad(tf.cast(scores, tf.float32), [[0, pad]]) + + # mask boxes to -1 by score threshold + scores_mask = tf.expand_dims( + tf.cast(scores > score_threshold, boxes.dtype), axis=1) + boxes = ((boxes + 1.) * scores_mask) - 1. + + boxes = tf.expand_dims(boxes, axis=0) + scores = tf.expand_dims(scores, axis=0) + + def _loop_cond(unused_boxes, unused_threshold, output_size, idx): + return tf.logical_and( + tf.reduce_min(output_size) < max_output_size, + idx < num_boxes // _NMS_TILE_SIZE) + + selected_boxes, _, output_size, _ = tf.while_loop( + _loop_cond, _suppression_loop_body, + [boxes, iou_threshold, + tf.zeros([1], tf.int32), + tf.constant(0)]) + idx = num_boxes - tf.cast( + tf.nn.top_k( + tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) * + tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0], + tf.int32) + idx = tf.minimum(idx, num_boxes - 1 - pad) + idx = tf.reshape(idx + tf.reshape(tf.range(1) * num_boxes, [-1, 1]), [-1]) + num_valid_boxes = tf.reduce_sum(output_size) + return (idx, num_valid_boxes, tf.reshape(boxes, [-1, 4]), + tf.reshape(scores, [-1]), argsort_ids) + + +def _validate_boxes_scores_iou_thresh(boxes, scores, iou_thresh, + change_coordinate_frame, clip_window): + """Validates boxes, scores and iou_thresh. + + This function validates the boxes, scores, iou_thresh + and if change_coordinate_frame is True, clip_window must be specified. + + Args: + boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either + number of classes or 1 depending on whether a separate box is predicted + per class. + scores: A [k, num_classes] float32 tensor containing the scores for each of + the k detections. The scores have to be non-negative when + pad_to_max_output_size is True. + iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap + with previously selected boxes are removed). + change_coordinate_frame: Whether to normalize coordinates after clipping + relative to clip_window (this can only be set to True if a clip_window is + provided) + clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] + representing the window to clip and normalize boxes to before performing + non-max suppression. + + Raises: + ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not + have a valid scores field. + """ + if not 0 <= iou_thresh <= 1.0: + raise ValueError('iou_thresh must be between 0 and 1') + if scores.shape.ndims != 2: + raise ValueError('scores field must be of rank 2') + if shape_utils.get_dim_as_int(scores.shape[1]) is None: + raise ValueError('scores must have statically defined second ' 'dimension') + if boxes.shape.ndims != 3: + raise ValueError('boxes must be of rank 3.') + if not (shape_utils.get_dim_as_int( + boxes.shape[1]) == shape_utils.get_dim_as_int(scores.shape[1]) or + shape_utils.get_dim_as_int(boxes.shape[1]) == 1): + raise ValueError('second dimension of boxes must be either 1 or equal ' + 'to the second dimension of scores') + if shape_utils.get_dim_as_int(boxes.shape[2]) != 4: + raise ValueError('last dimension of boxes must be of size 4.') + if change_coordinate_frame and clip_window is None: + raise ValueError('if change_coordinate_frame is True, then a clip_window' + 'must be specified.') + + +def _clip_window_prune_boxes(sorted_boxes, clip_window, pad_to_max_output_size, + change_coordinate_frame): + """Prune boxes with zero area. + + Args: + sorted_boxes: A BoxList containing k detections. + clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] + representing the window to clip and normalize boxes to before performing + non-max suppression. + pad_to_max_output_size: flag indicating whether to pad to max output size or + not. + change_coordinate_frame: Whether to normalize coordinates after clipping + relative to clip_window (this can only be set to True if a clip_window is + provided). + + Returns: + sorted_boxes: A BoxList containing k detections after pruning. + num_valid_nms_boxes_cumulative: Number of valid NMS boxes + """ + sorted_boxes = box_list_ops.clip_to_window( + sorted_boxes, + clip_window, + filter_nonoverlapping=not pad_to_max_output_size) + # Set the scores of boxes with zero area to -1 to keep the default + # behaviour of pruning out zero area boxes. + sorted_boxes_size = tf.shape(sorted_boxes.get())[0] + non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes), tf.bool) + sorted_boxes_scores = tf.where( + non_zero_box_area, sorted_boxes.get_field(fields.BoxListFields.scores), + -1 * tf.ones(sorted_boxes_size)) + sorted_boxes.add_field(fields.BoxListFields.scores, sorted_boxes_scores) + num_valid_nms_boxes_cumulative = tf.reduce_sum( + tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32)) + sorted_boxes = box_list_ops.sort_by_field(sorted_boxes, + fields.BoxListFields.scores) + if change_coordinate_frame: + sorted_boxes = box_list_ops.change_coordinate_frame(sorted_boxes, + clip_window) + return sorted_boxes, num_valid_nms_boxes_cumulative + + +def multiclass_non_max_suppression(boxes, + scores, + score_thresh, + iou_thresh, + max_size_per_class, + max_total_size=0, + clip_window=None, + change_coordinate_frame=False, + masks=None, + boundaries=None, + pad_to_max_output_size=False, + use_partitioned_nms=False, + additional_fields=None, + soft_nms_sigma=0.0, + scope=None): + """Multi-class version of non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. It operates independently for each class for + which scores are provided (via the scores field of the input box_list), + pruning boxes with score less than a provided threshold prior to + applying NMS. + + Please note that this operation is performed on *all* classes, therefore any + background classes should be removed prior to calling this function. + + Selected boxes are guaranteed to be sorted in decreasing order by score (but + the sort is not guaranteed to be stable). + + Args: + boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either + number of classes or 1 depending on whether a separate box is predicted + per class. + scores: A [k, num_classes] float32 tensor containing the scores for each of + the k detections. The scores have to be non-negative when + pad_to_max_output_size is True. + score_thresh: scalar threshold for score (low scoring boxes are removed). + iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap + with previously selected boxes are removed). + max_size_per_class: maximum number of retained boxes per class. + max_total_size: maximum number of boxes retained over all classes. By + default returns all boxes retained after capping boxes per class. + clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] + representing the window to clip and normalize boxes to before performing + non-max suppression. + change_coordinate_frame: Whether to normalize coordinates after clipping + relative to clip_window (this can only be set to True if a clip_window + is provided) + masks: (optional) a [k, q, mask_height, mask_width] float32 tensor + containing box masks. `q` can be either number of classes or 1 depending + on whether a separate mask is predicted per class. + boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 + tensor containing box boundaries. `q` can be either number of classes or 1 + depending on whether a separate boundary is predicted per class. + pad_to_max_output_size: If true, the output nmsed boxes are padded to be of + length `max_size_per_class`. Defaults to false. + use_partitioned_nms: If true, use partitioned version of + non_max_suppression. + additional_fields: (optional) If not None, a dictionary that maps keys to + tensors whose first dimensions are all of size `k`. After non-maximum + suppression, all tensors corresponding to the selected boxes will be + added to resulting BoxList. + soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; + See Bodla et al, https://arxiv.org/abs/1704.04503). When + `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) + NMS. Soft NMS is currently only supported when pad_to_max_output_size is + False. + scope: name scope. + + Returns: + A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a + BoxList holds M boxes with a rank-1 scores field representing + corresponding scores for each box with scores sorted in decreasing order + and a rank-1 classes field representing a class label for each box. The + num_valid_nms_boxes is a 0-D integer tensor representing the number of + valid elements in `BoxList`, with the valid elements appearing first. + + Raises: + ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have + a valid scores field. + ValueError: if Soft NMS (tf.image.non_max_suppression_with_scores) is not + supported in the current TF version and `soft_nms_sigma` is nonzero. + """ + _validate_boxes_scores_iou_thresh(boxes, scores, iou_thresh, + change_coordinate_frame, clip_window) + if pad_to_max_output_size and soft_nms_sigma != 0.0: + raise ValueError('Soft NMS (soft_nms_sigma != 0.0) is currently not ' + 'supported when pad_to_max_output_size is True.') + + with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): + num_scores = tf.shape(scores)[0] + num_classes = shape_utils.get_dim_as_int(scores.get_shape()[1]) + + selected_boxes_list = [] + num_valid_nms_boxes_cumulative = tf.constant(0) + per_class_boxes_list = tf.unstack(boxes, axis=1) + if masks is not None: + per_class_masks_list = tf.unstack(masks, axis=1) + if boundaries is not None: + per_class_boundaries_list = tf.unstack(boundaries, axis=1) + boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 + else [0] * num_classes) + for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): + per_class_boxes = per_class_boxes_list[boxes_idx] + boxlist_and_class_scores = box_list.BoxList(per_class_boxes) + class_scores = tf.reshape( + tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) + + boxlist_and_class_scores.add_field(fields.BoxListFields.scores, + class_scores) + if masks is not None: + per_class_masks = per_class_masks_list[boxes_idx] + boxlist_and_class_scores.add_field(fields.BoxListFields.masks, + per_class_masks) + if boundaries is not None: + per_class_boundaries = per_class_boundaries_list[boxes_idx] + boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries, + per_class_boundaries) + if additional_fields is not None: + for key, tensor in additional_fields.items(): + boxlist_and_class_scores.add_field(key, tensor) + + nms_result = None + selected_scores = None + if pad_to_max_output_size: + max_selection_size = max_size_per_class + if use_partitioned_nms: + (selected_indices, num_valid_nms_boxes, + boxlist_and_class_scores.data['boxes'], + boxlist_and_class_scores.data['scores'], + _) = partitioned_non_max_suppression_padded( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh) + else: + selected_indices, num_valid_nms_boxes = ( + tf.image.non_max_suppression_padded( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field( + fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh, + pad_to_max_output_size=True)) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + selected_scores = nms_result.get_field(fields.BoxListFields.scores) + else: + max_selection_size = tf.minimum(max_size_per_class, + boxlist_and_class_scores.num_boxes()) + if (hasattr(tf.image, 'non_max_suppression_with_scores') and + tf.compat.forward_compatible(2019, 6, 6)): + (selected_indices, selected_scores + ) = tf.image.non_max_suppression_with_scores( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh, + soft_nms_sigma=soft_nms_sigma) + num_valid_nms_boxes = tf.shape(selected_indices)[0] + selected_indices = tf.concat( + [selected_indices, + tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0) + selected_scores = tf.concat( + [selected_scores, + tf.zeros(max_selection_size-num_valid_nms_boxes, + tf.float32)], -1) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + else: + if soft_nms_sigma != 0: + raise ValueError('Soft NMS not supported in current TF version!') + selected_indices = tf.image.non_max_suppression( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh) + num_valid_nms_boxes = tf.shape(selected_indices)[0] + selected_indices = tf.concat( + [selected_indices, + tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + selected_scores = nms_result.get_field(fields.BoxListFields.scores) + # Make the scores -1 for invalid boxes. + valid_nms_boxes_indices = tf.less( + tf.range(max_selection_size), num_valid_nms_boxes) + + nms_result.add_field( + fields.BoxListFields.scores, + tf.where(valid_nms_boxes_indices, + selected_scores, -1*tf.ones(max_selection_size))) + num_valid_nms_boxes_cumulative += num_valid_nms_boxes + + nms_result.add_field( + fields.BoxListFields.classes, (tf.zeros_like( + nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) + selected_boxes_list.append(nms_result) + selected_boxes = box_list_ops.concatenate(selected_boxes_list) + sorted_boxes = box_list_ops.sort_by_field(selected_boxes, + fields.BoxListFields.scores) + if clip_window is not None: + # When pad_to_max_output_size is False, it prunes the boxes with zero + # area. + sorted_boxes, num_valid_nms_boxes_cumulative = _clip_window_prune_boxes( + sorted_boxes, clip_window, pad_to_max_output_size, + change_coordinate_frame) + + if max_total_size: + max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) + sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) + num_valid_nms_boxes_cumulative = tf.where( + max_total_size > num_valid_nms_boxes_cumulative, + num_valid_nms_boxes_cumulative, max_total_size) + # Select only the valid boxes if pad_to_max_output_size is False. + if not pad_to_max_output_size: + sorted_boxes = box_list_ops.gather( + sorted_boxes, tf.range(num_valid_nms_boxes_cumulative)) + + return sorted_boxes, num_valid_nms_boxes_cumulative + + +def class_agnostic_non_max_suppression(boxes, + scores, + score_thresh, + iou_thresh, + max_classes_per_detection=1, + max_total_size=0, + clip_window=None, + change_coordinate_frame=False, + masks=None, + boundaries=None, + pad_to_max_output_size=False, + use_partitioned_nms=False, + additional_fields=None, + soft_nms_sigma=0.0, + scope=None): + """Class-agnostic version of non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. It operates on all the boxes using + max scores across all classes for which scores are provided (via the scores + field of the input box_list), pruning boxes with score less than a provided + threshold prior to applying NMS. + + Please note that this operation is performed in a class-agnostic way, + therefore any background classes should be removed prior to calling this + function. + + Selected boxes are guaranteed to be sorted in decreasing order by score (but + the sort is not guaranteed to be stable). + + Args: + boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either + number of classes or 1 depending on whether a separate box is predicted + per class. + scores: A [k, num_classes] float32 tensor containing the scores for each of + the k detections. The scores have to be non-negative when + pad_to_max_output_size is True. + score_thresh: scalar threshold for score (low scoring boxes are removed). + iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap + with previously selected boxes are removed). + max_classes_per_detection: maximum number of retained classes per detection + box in class-agnostic NMS. + max_total_size: maximum number of boxes retained over all classes. By + default returns all boxes retained after capping boxes per class. + clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] + representing the window to clip and normalize boxes to before performing + non-max suppression. + change_coordinate_frame: Whether to normalize coordinates after clipping + relative to clip_window (this can only be set to True if a clip_window is + provided) + masks: (optional) a [k, q, mask_height, mask_width] float32 tensor + containing box masks. `q` can be either number of classes or 1 depending + on whether a separate mask is predicted per class. + boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 + tensor containing box boundaries. `q` can be either number of classes or 1 + depending on whether a separate boundary is predicted per class. + pad_to_max_output_size: If true, the output nmsed boxes are padded to be of + length `max_size_per_class`. Defaults to false. + use_partitioned_nms: If true, use partitioned version of + non_max_suppression. + additional_fields: (optional) If not None, a dictionary that maps keys to + tensors whose first dimensions are all of size `k`. After non-maximum + suppression, all tensors corresponding to the selected boxes will be added + to resulting BoxList. + soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; + See Bodla et al, https://arxiv.org/abs/1704.04503). When + `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) + NMS. Soft NMS is currently only supported when pad_to_max_output_size is + False. + scope: name scope. + + Returns: + A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a + BoxList holds M boxes with a rank-1 scores field representing + corresponding scores for each box with scores sorted in decreasing order + and a rank-1 classes field representing a class label for each box. The + num_valid_nms_boxes is a 0-D integer tensor representing the number of + valid elements in `BoxList`, with the valid elements appearing first. + + Raises: + ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have + a valid scores field or if non-zero soft_nms_sigma is provided when + pad_to_max_output_size is True. + """ + _validate_boxes_scores_iou_thresh(boxes, scores, iou_thresh, + change_coordinate_frame, clip_window) + if pad_to_max_output_size and soft_nms_sigma != 0.0: + raise ValueError('Soft NMS (soft_nms_sigma != 0.0) is currently not ' + 'supported when pad_to_max_output_size is True.') + + if max_classes_per_detection > 1: + raise ValueError('Max classes per detection box >1 not supported.') + q = shape_utils.get_dim_as_int(boxes.shape[1]) + if q > 1: + class_ids = tf.expand_dims( + tf.argmax(scores, axis=1, output_type=tf.int32), axis=1) + boxes = tf.batch_gather(boxes, class_ids) + if masks is not None: + masks = tf.batch_gather(masks, class_ids) + if boundaries is not None: + boundaries = tf.batch_gather(boundaries, class_ids) + boxes = tf.squeeze(boxes, axis=[1]) + if masks is not None: + masks = tf.squeeze(masks, axis=[1]) + if boundaries is not None: + boundaries = tf.squeeze(boundaries, axis=[1]) + + with tf.name_scope(scope, 'ClassAgnosticNonMaxSuppression'): + boxlist_and_class_scores = box_list.BoxList(boxes) + max_scores = tf.reduce_max(scores, axis=-1) + classes_with_max_scores = tf.argmax(scores, axis=-1) + boxlist_and_class_scores.add_field(fields.BoxListFields.scores, max_scores) + if masks is not None: + boxlist_and_class_scores.add_field(fields.BoxListFields.masks, masks) + if boundaries is not None: + boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries, + boundaries) + + if additional_fields is not None: + for key, tensor in additional_fields.items(): + boxlist_and_class_scores.add_field(key, tensor) + + nms_result = None + selected_scores = None + if pad_to_max_output_size: + max_selection_size = max_total_size + if use_partitioned_nms: + (selected_indices, num_valid_nms_boxes, + boxlist_and_class_scores.data['boxes'], + boxlist_and_class_scores.data['scores'], + argsort_ids) = partitioned_non_max_suppression_padded( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh) + classes_with_max_scores = tf.gather(classes_with_max_scores, + argsort_ids) + else: + selected_indices, num_valid_nms_boxes = ( + tf.image.non_max_suppression_padded( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh, + pad_to_max_output_size=True)) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + selected_scores = nms_result.get_field(fields.BoxListFields.scores) + else: + max_selection_size = tf.minimum(max_total_size, + boxlist_and_class_scores.num_boxes()) + if (hasattr(tf.image, 'non_max_suppression_with_scores') and + tf.compat.forward_compatible(2019, 6, 6)): + (selected_indices, selected_scores + ) = tf.image.non_max_suppression_with_scores( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh, + soft_nms_sigma=soft_nms_sigma) + num_valid_nms_boxes = tf.shape(selected_indices)[0] + selected_indices = tf.concat([ + selected_indices, + tf.zeros(max_selection_size - num_valid_nms_boxes, tf.int32) + ], 0) + selected_scores = tf.concat( + [selected_scores, + tf.zeros(max_selection_size-num_valid_nms_boxes, tf.float32)], -1) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + else: + if soft_nms_sigma != 0: + raise ValueError('Soft NMS not supported in current TF version!') + selected_indices = tf.image.non_max_suppression( + boxlist_and_class_scores.get(), + boxlist_and_class_scores.get_field(fields.BoxListFields.scores), + max_selection_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh) + num_valid_nms_boxes = tf.shape(selected_indices)[0] + selected_indices = tf.concat( + [selected_indices, + tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0) + nms_result = box_list_ops.gather(boxlist_and_class_scores, + selected_indices) + selected_scores = nms_result.get_field(fields.BoxListFields.scores) + valid_nms_boxes_indices = tf.less( + tf.range(max_selection_size), num_valid_nms_boxes) + nms_result.add_field( + fields.BoxListFields.scores, + tf.where(valid_nms_boxes_indices, + selected_scores, -1*tf.ones(max_selection_size))) + + selected_classes = tf.gather(classes_with_max_scores, selected_indices) + selected_classes = tf.cast(selected_classes, tf.float32) + nms_result.add_field(fields.BoxListFields.classes, selected_classes) + selected_boxes = nms_result + sorted_boxes = box_list_ops.sort_by_field(selected_boxes, + fields.BoxListFields.scores) + + if clip_window is not None: + # When pad_to_max_output_size is False, it prunes the boxes with zero + # area. + sorted_boxes, num_valid_nms_boxes = _clip_window_prune_boxes( + sorted_boxes, clip_window, pad_to_max_output_size, + change_coordinate_frame) + + if max_total_size: + max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) + sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) + num_valid_nms_boxes = tf.where(max_total_size > num_valid_nms_boxes, + num_valid_nms_boxes, max_total_size) + # Select only the valid boxes if pad_to_max_output_size is False. + if not pad_to_max_output_size: + sorted_boxes = box_list_ops.gather(sorted_boxes, + tf.range(num_valid_nms_boxes)) + + return sorted_boxes, num_valid_nms_boxes + + +def batch_multiclass_non_max_suppression(boxes, + scores, + score_thresh, + iou_thresh, + max_size_per_class, + max_total_size=0, + clip_window=None, + change_coordinate_frame=False, + num_valid_boxes=None, + masks=None, + additional_fields=None, + soft_nms_sigma=0.0, + scope=None, + use_static_shapes=False, + use_partitioned_nms=False, + parallel_iterations=32, + use_class_agnostic_nms=False, + max_classes_per_detection=1, + use_dynamic_map_fn=False, + use_combined_nms=False): + """Multi-class version of non maximum suppression that operates on a batch. + + This op is similar to `multiclass_non_max_suppression` but operates on a batch + of boxes and scores. See documentation for `multiclass_non_max_suppression` + for details. + + Args: + boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing + detections. If `q` is 1 then same boxes are used for all classes + otherwise, if `q` is equal to number of classes, class-specific boxes are + used. + scores: A [batch_size, num_anchors, num_classes] float32 tensor containing + the scores for each of the `num_anchors` detections. The scores have to be + non-negative when use_static_shapes is set True. + score_thresh: scalar threshold for score (low scoring boxes are removed). + iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap + with previously selected boxes are removed). + max_size_per_class: maximum number of retained boxes per class. + max_total_size: maximum number of boxes retained over all classes. By + default returns all boxes retained after capping boxes per class. + clip_window: A float32 tensor of shape [batch_size, 4] where each entry is + of the form [y_min, x_min, y_max, x_max] representing the window to clip + boxes to before performing non-max suppression. This argument can also be + a tensor of shape [4] in which case, the same clip window is applied to + all images in the batch. If clip_widow is None, all boxes are used to + perform non-max suppression. + change_coordinate_frame: Whether to normalize coordinates after clipping + relative to clip_window (this can only be set to True if a clip_window is + provided) + num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape + [batch_size] representing the number of valid boxes to be considered for + each image in the batch. This parameter allows for ignoring zero + paddings. + masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width] + float32 tensor containing box masks. `q` can be either number of classes + or 1 depending on whether a separate mask is predicted per class. + additional_fields: (optional) If not None, a dictionary that maps keys to + tensors whose dimensions are [batch_size, num_anchors, ...]. + soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; + See Bodla et al, https://arxiv.org/abs/1704.04503). When + `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) + NMS. Soft NMS is currently only supported when pad_to_max_output_size is + False. + scope: tf scope name. + use_static_shapes: If true, the output nmsed boxes are padded to be of + length `max_size_per_class` and it doesn't clip boxes to max_total_size. + Defaults to false. + use_partitioned_nms: If true, use partitioned version of + non_max_suppression. + parallel_iterations: (optional) number of batch items to process in + parallel. + use_class_agnostic_nms: If true, this uses class-agnostic non max + suppression + max_classes_per_detection: Maximum number of retained classes per detection + box in class-agnostic NMS. + use_dynamic_map_fn: If true, images in the batch will be processed within a + dynamic loop. Otherwise, a static loop will be used if possible. + use_combined_nms: If true, it uses tf.image.combined_non_max_suppression ( + multi-class version of NMS that operates on a batch). + It greedily selects a subset of detection bounding boxes, pruning away + boxes that have high IOU (intersection over union) overlap (> thresh) with + already selected boxes. It operates independently for each batch. + Within each batch, it operates independently for each class for which + scores are provided (via the scores field of the input box_list), + pruning boxes with score less than a provided threshold prior to applying + NMS. This operation is performed on *all* batches and *all* classes + in the batch, therefore any background classes should be removed prior to + calling this function. + Masks and additional fields are not supported. + See argument checks in the code below for unsupported arguments. + + Returns: + 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor + containing the non-max suppressed boxes. + 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing + the scores for the boxes. + 'nmsed_classes': A [batch_size, max_detections] float32 tensor + containing the class for boxes. + 'nmsed_masks': (optional) a + [batch_size, max_detections, mask_height, mask_width] float32 tensor + containing masks for each selected box. This is set to None if input + `masks` is None. + 'nmsed_additional_fields': (optional) a dictionary of + [batch_size, max_detections, ...] float32 tensors corresponding to the + tensors specified in the input `additional_fields`. This is not returned + if input `additional_fields` is None. + 'num_detections': A [batch_size] int32 tensor indicating the number of + valid detections per batch item. Only the top num_detections[i] entries in + nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the + entries are zero paddings. + + Raises: + ValueError: if `q` in boxes.shape is not 1 or not equal to number of + classes as inferred from scores.shape. + """ + if use_combined_nms: + if change_coordinate_frame: + raise ValueError( + 'change_coordinate_frame (normalizing coordinates' + ' relative to clip_window) is not supported by combined_nms.') + if num_valid_boxes is not None: + raise ValueError('num_valid_boxes is not supported by combined_nms.') + if masks is not None: + raise ValueError('masks is not supported by combined_nms.') + if soft_nms_sigma != 0.0: + raise ValueError('Soft NMS is not supported by combined_nms.') + if use_class_agnostic_nms: + raise ValueError('class-agnostic NMS is not supported by combined_nms.') + if clip_window is not None: + tf.compat.v1.logging.warning( + 'clip_window is not supported by combined_nms unless it is' + ' [0. 0. 1. 1.] for each image.') + if additional_fields is not None: + tf.compat.v1.logging.warning( + 'additional_fields is not supported by combined_nms.') + if parallel_iterations != 32: + tf.compat.v1.logging.warning( + 'Number of batch items to be processed in parallel is' + ' not configurable by combined_nms.') + if max_classes_per_detection > 1: + tf.compat.v1.logging.warning( + 'max_classes_per_detection is not configurable by combined_nms.') + + with tf.name_scope(scope, 'CombinedNonMaxSuppression'): + (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, + batch_num_detections) = tf.image.combined_non_max_suppression( + boxes=boxes, + scores=scores, + max_output_size_per_class=max_size_per_class, + max_total_size=max_total_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh, + pad_per_class=use_static_shapes) + # Not supported by combined_non_max_suppression. + batch_nmsed_masks = None + # Not supported by combined_non_max_suppression. + batch_nmsed_additional_fields = None + return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, + batch_nmsed_masks, batch_nmsed_additional_fields, + batch_num_detections) + + q = shape_utils.get_dim_as_int(boxes.shape[2]) + num_classes = shape_utils.get_dim_as_int(scores.shape[2]) + if q != 1 and q != num_classes: + raise ValueError('third dimension of boxes must be either 1 or equal ' + 'to the third dimension of scores.') + if change_coordinate_frame and clip_window is None: + raise ValueError('if change_coordinate_frame is True, then a clip_window' + 'must be specified.') + original_masks = masks + + # Create ordered dictionary using the sorted keys from + # additional fields to ensure getting the same key value assignment + # in _single_image_nms_fn(). The dictionary is thus a sorted version of + # additional_fields. + if additional_fields is None: + ordered_additional_fields = {} + else: + ordered_additional_fields = collections.OrderedDict( + sorted(additional_fields.items(), key=lambda item: item[0])) + del additional_fields + with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): + boxes_shape = boxes.shape + batch_size = shape_utils.get_dim_as_int(boxes_shape[0]) + num_anchors = shape_utils.get_dim_as_int(boxes_shape[1]) + + if batch_size is None: + batch_size = tf.shape(boxes)[0] + if num_anchors is None: + num_anchors = tf.shape(boxes)[1] + + # If num valid boxes aren't provided, create one and mark all boxes as + # valid. + if num_valid_boxes is None: + num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors + + # If masks aren't provided, create dummy masks so we can only have one copy + # of _single_image_nms_fn and discard the dummy masks after map_fn. + if masks is None: + masks_shape = tf.stack([batch_size, num_anchors, q, 1, 1]) + masks = tf.zeros(masks_shape) + + if clip_window is None: + clip_window = tf.stack([ + tf.reduce_min(boxes[:, :, :, 0]), + tf.reduce_min(boxes[:, :, :, 1]), + tf.reduce_max(boxes[:, :, :, 2]), + tf.reduce_max(boxes[:, :, :, 3]) + ]) + if clip_window.shape.ndims == 1: + clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1]) + + def _single_image_nms_fn(args): + """Runs NMS on a single image and returns padded output. + + Args: + args: A list of tensors consisting of the following: + per_image_boxes - A [num_anchors, q, 4] float32 tensor containing + detections. If `q` is 1 then same boxes are used for all classes + otherwise, if `q` is equal to number of classes, class-specific + boxes are used. + per_image_scores - A [num_anchors, num_classes] float32 tensor + containing the scores for each of the `num_anchors` detections. + per_image_masks - A [num_anchors, q, mask_height, mask_width] float32 + tensor containing box masks. `q` can be either number of classes + or 1 depending on whether a separate mask is predicted per class. + per_image_clip_window - A 1D float32 tensor of the form + [ymin, xmin, ymax, xmax] representing the window to clip the boxes + to. + per_image_additional_fields - (optional) A variable number of float32 + tensors each with size [num_anchors, ...]. + per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of + shape [batch_size] representing the number of valid boxes to be + considered for each image in the batch. This parameter allows for + ignoring zero paddings. + + Returns: + 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the + non-max suppressed boxes. + 'nmsed_scores': A [max_detections] float32 tensor containing the scores + for the boxes. + 'nmsed_classes': A [max_detections] float32 tensor containing the class + for boxes. + 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width] + float32 tensor containing masks for each selected box. This is set to + None if input `masks` is None. + 'nmsed_additional_fields': (optional) A variable number of float32 + tensors each with size [max_detections, ...] corresponding to the + input `per_image_additional_fields`. + 'num_detections': A [batch_size] int32 tensor indicating the number of + valid detections per batch item. Only the top num_detections[i] + entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The + rest of the entries are zero paddings. + """ + per_image_boxes = args[0] + per_image_scores = args[1] + per_image_masks = args[2] + per_image_clip_window = args[3] + # Make sure that the order of elements passed in args is aligned with + # the iteration order of ordered_additional_fields + per_image_additional_fields = { + key: value + for key, value in zip(ordered_additional_fields, args[4:-1]) + } + per_image_num_valid_boxes = args[-1] + if use_static_shapes: + total_proposals = tf.shape(per_image_scores) + per_image_scores = tf.where( + tf.less(tf.range(total_proposals[0]), per_image_num_valid_boxes), + per_image_scores, + tf.fill(total_proposals, np.finfo('float32').min)) + else: + per_image_boxes = tf.reshape( + tf.slice(per_image_boxes, 3 * [0], + tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4]) + per_image_scores = tf.reshape( + tf.slice(per_image_scores, [0, 0], + tf.stack([per_image_num_valid_boxes, -1])), + [-1, num_classes]) + per_image_masks = tf.reshape( + tf.slice(per_image_masks, 4 * [0], + tf.stack([per_image_num_valid_boxes, -1, -1, -1])), + [-1, q, shape_utils.get_dim_as_int(per_image_masks.shape[2]), + shape_utils.get_dim_as_int(per_image_masks.shape[3])]) + if per_image_additional_fields is not None: + for key, tensor in per_image_additional_fields.items(): + additional_field_shape = tensor.get_shape() + additional_field_dim = len(additional_field_shape) + per_image_additional_fields[key] = tf.reshape( + tf.slice( + per_image_additional_fields[key], + additional_field_dim * [0], + tf.stack([per_image_num_valid_boxes] + + (additional_field_dim - 1) * [-1])), [-1] + [ + shape_utils.get_dim_as_int(dim) + for dim in additional_field_shape[1:] + ]) + if use_class_agnostic_nms: + nmsed_boxlist, num_valid_nms_boxes = class_agnostic_non_max_suppression( + per_image_boxes, + per_image_scores, + score_thresh, + iou_thresh, + max_classes_per_detection, + max_total_size, + clip_window=per_image_clip_window, + change_coordinate_frame=change_coordinate_frame, + masks=per_image_masks, + pad_to_max_output_size=use_static_shapes, + use_partitioned_nms=use_partitioned_nms, + additional_fields=per_image_additional_fields, + soft_nms_sigma=soft_nms_sigma) + else: + nmsed_boxlist, num_valid_nms_boxes = multiclass_non_max_suppression( + per_image_boxes, + per_image_scores, + score_thresh, + iou_thresh, + max_size_per_class, + max_total_size, + clip_window=per_image_clip_window, + change_coordinate_frame=change_coordinate_frame, + masks=per_image_masks, + pad_to_max_output_size=use_static_shapes, + use_partitioned_nms=use_partitioned_nms, + additional_fields=per_image_additional_fields, + soft_nms_sigma=soft_nms_sigma) + + if not use_static_shapes: + nmsed_boxlist = box_list_ops.pad_or_clip_box_list( + nmsed_boxlist, max_total_size) + num_detections = num_valid_nms_boxes + nmsed_boxes = nmsed_boxlist.get() + nmsed_scores = nmsed_boxlist.get_field(fields.BoxListFields.scores) + nmsed_classes = nmsed_boxlist.get_field(fields.BoxListFields.classes) + nmsed_masks = nmsed_boxlist.get_field(fields.BoxListFields.masks) + nmsed_additional_fields = [] + # Sorting is needed here to ensure that the values stored in + # nmsed_additional_fields are always kept in the same order + # across different execution runs. + for key in sorted(per_image_additional_fields.keys()): + nmsed_additional_fields.append(nmsed_boxlist.get_field(key)) + return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] + + nmsed_additional_fields + [num_detections]) + + num_additional_fields = 0 + if ordered_additional_fields: + num_additional_fields = len(ordered_additional_fields) + num_nmsed_outputs = 4 + num_additional_fields + + if use_dynamic_map_fn: + map_fn = tf.map_fn + else: + map_fn = shape_utils.static_or_dynamic_map_fn + + batch_outputs = map_fn( + _single_image_nms_fn, + elems=([boxes, scores, masks, clip_window] + + list(ordered_additional_fields.values()) + [num_valid_boxes]), + dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]), + parallel_iterations=parallel_iterations) + + batch_nmsed_boxes = batch_outputs[0] + batch_nmsed_scores = batch_outputs[1] + batch_nmsed_classes = batch_outputs[2] + batch_nmsed_masks = batch_outputs[3] + batch_nmsed_values = batch_outputs[4:-1] + + batch_nmsed_additional_fields = {} + if num_additional_fields > 0: + # Sort the keys to ensure arranging elements in same order as + # in _single_image_nms_fn. + batch_nmsed_keys = list(ordered_additional_fields.keys()) + for i in range(len(batch_nmsed_keys)): + batch_nmsed_additional_fields[ + batch_nmsed_keys[i]] = batch_nmsed_values[i] + + batch_num_detections = batch_outputs[-1] + + if original_masks is None: + batch_nmsed_masks = None + + if not ordered_additional_fields: + batch_nmsed_additional_fields = None + + return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, + batch_nmsed_masks, batch_nmsed_additional_fields, + batch_num_detections) diff --git a/core/prefetcher.py b/core/prefetcher.py new file mode 100644 index 0000000..9bb7d65 --- /dev/null +++ b/core/prefetcher.py @@ -0,0 +1,61 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Provides functions to prefetch tensors to feed into models.""" +import tensorflow as tf + + +def prefetch(tensor_dict, capacity): + """Creates a prefetch queue for tensors. + + Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a + dequeue op that evaluates to a tensor_dict. This function is useful in + prefetching preprocessed tensors so that the data is readily available for + consumers. + + Example input pipeline when you don't need batching: + ---------------------------------------------------- + key, string_tensor = slim.parallel_reader.parallel_read(...) + tensor_dict = decoder.decode(string_tensor) + tensor_dict = preprocessor.preprocess(tensor_dict, ...) + prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) + tensor_dict = prefetch_queue.dequeue() + outputs = Model(tensor_dict) + ... + ---------------------------------------------------- + + For input pipelines with batching, refer to core/batcher.py + + Args: + tensor_dict: a dictionary of tensors to prefetch. + capacity: the size of the prefetch queue. + + Returns: + a FIFO prefetcher queue + """ + names = list(tensor_dict.keys()) + dtypes = [t.dtype for t in tensor_dict.values()] + shapes = [t.get_shape() for t in tensor_dict.values()] + prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, + shapes=shapes, + names=names, + name='prefetch_queue') + enqueue_op = prefetch_queue.enqueue(tensor_dict) + tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( + prefetch_queue, [enqueue_op])) + tf.summary.scalar( + 'queue/%s/fraction_of_%d_full' % (prefetch_queue.name, capacity), + tf.cast(prefetch_queue.size(), dtype=tf.float32) * (1. / capacity)) + return prefetch_queue diff --git a/core/prefetcher_test.py b/core/prefetcher_test.py new file mode 100644 index 0000000..83782d3 --- /dev/null +++ b/core/prefetcher_test.py @@ -0,0 +1,106 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.prefetcher.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import range +import tensorflow as tf + +from object_detection.core import prefetcher + +slim = tf.contrib.slim + + +class PrefetcherTest(tf.test.TestCase): + + def test_prefetch_tensors_with_fully_defined_shapes(self): + with self.test_session() as sess: + batch_size = 10 + image_size = 32 + num_batches = 5 + examples = tf.Variable(tf.constant(0, dtype=tf.int64)) + counter = examples.count_up_to(num_batches) + image = tf.random_normal([batch_size, image_size, + image_size, 3], + dtype=tf.float32, + name='images') + label = tf.random_uniform([batch_size, 1], 0, 10, + dtype=tf.int32, name='labels') + + prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, + 'image': image, + 'label': label}, + capacity=100) + tensor_dict = prefetch_queue.dequeue() + + self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), + [batch_size, image_size, image_size, 3]) + self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), + [batch_size, 1]) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + for _ in range(num_batches): + results = sess.run(tensor_dict) + self.assertEquals(results['image'].shape, + (batch_size, image_size, image_size, 3)) + self.assertEquals(results['label'].shape, (batch_size, 1)) + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(tensor_dict) + + def test_prefetch_tensors_with_partially_defined_shapes(self): + with self.test_session() as sess: + batch_size = 10 + image_size = 32 + num_batches = 5 + examples = tf.Variable(tf.constant(0, dtype=tf.int64)) + counter = examples.count_up_to(num_batches) + image = tf.random_normal([batch_size, + tf.Variable(image_size), + tf.Variable(image_size), 3], + dtype=tf.float32, + name='image') + image.set_shape([batch_size, None, None, 3]) + label = tf.random_uniform([batch_size, tf.Variable(1)], 0, + 10, dtype=tf.int32, name='label') + label.set_shape([batch_size, None]) + + prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, + 'image': image, + 'label': label}, + capacity=100) + tensor_dict = prefetch_queue.dequeue() + + self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), + [batch_size, None, None, 3]) + self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), + [batch_size, None]) + + tf.initialize_all_variables().run() + with slim.queues.QueueRunners(sess): + for _ in range(num_batches): + results = sess.run(tensor_dict) + self.assertEquals(results['image'].shape, + (batch_size, image_size, image_size, 3)) + self.assertEquals(results['label'].shape, (batch_size, 1)) + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(tensor_dict) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/preprocessor.py b/core/preprocessor.py new file mode 100644 index 0000000..1c74a58 --- /dev/null +++ b/core/preprocessor.py @@ -0,0 +1,4008 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Preprocess images and bounding boxes for detection. + +We perform two sets of operations in preprocessing stage: +(a) operations that are applied to both training and testing data, +(b) operations that are applied only to training data for the purpose of + data augmentation. + +A preprocessing function receives a set of inputs, +e.g. an image and bounding boxes, +performs an operation on them, and returns them. +Some examples are: randomly cropping the image, randomly mirroring the image, + randomly changing the brightness, contrast, hue and + randomly jittering the bounding boxes. + +The preprocess function receives a tensor_dict which is a dictionary that maps +different field names to their tensors. For example, +tensor_dict[fields.InputDataFields.image] holds the image tensor. +The image is a rank 4 tensor: [1, height, width, channels] with +dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where +in each row there is a box with [ymin xmin ymax xmax]. +Boxes are in normalized coordinates meaning +their coordinate values range in [0, 1] + +To preprocess multiple images with the same operations in cases where +nondeterministic operations are used, a preprocessor_cache.PreprocessorCache +object can be passed into the preprocess function or individual operations. +All nondeterministic operations except random_jitter_boxes support caching. +E.g. +Let tensor_dict{1,2,3,4,5} be copies of the same inputs. +Let preprocess_options contain nondeterministic operation(s) excluding +random_jitter_boxes. + +cache1 = preprocessor_cache.PreprocessorCache() +cache2 = preprocessor_cache.PreprocessorCache() +a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1) +b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1) +c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2) +d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2) +e = preprocess(tensor_dict5, preprocess_options) + +Then correspondings tensors of object pairs (a,b) and (c,d) +are guaranteed to be equal element-wise, but the equality of any other object +pair cannot be determined. + +Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing +functions receive a rank 3 tensor for processing the image. Thus, inside the +preprocess function we squeeze the image to become a rank 3 tensor and then +we pass it to the functions. At the end of the preprocess we expand the image +back to rank 4. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import inspect +import sys + +import six +from six.moves import range +from six.moves import zip +import tensorflow as tf + +from tensorflow.python.ops import control_flow_ops +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import keypoint_ops +from object_detection.core import preprocessor_cache +from object_detection.core import standard_fields as fields +from object_detection.utils import autoaugment_utils +from object_detection.utils import patch_ops +from object_detection.utils import shape_utils + + +def _apply_with_random_selector(x, + func, + num_cases, + preprocess_vars_cache=None, + key=''): + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + + If both preprocess_vars_cache AND key are the same between two calls, sel will + be the same value in both calls. + + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + key: variable identifier for preprocess_vars_cache. + + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + generator_func = functools.partial( + tf.random_uniform, [], maxval=num_cases, dtype=tf.int32) + rand_sel = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.SELECTOR, + preprocess_vars_cache, key) + + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([func( + control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case) + for case in range(num_cases)])[0] + + +def _apply_with_random_selector_tuples(x, + func, + num_cases, + preprocess_vars_cache=None, + key=''): + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + + If both preprocess_vars_cache AND key are the same between two calls, sel will + be the same value in both calls. + + Args: + x: A tuple of input tensors. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + key: variable identifier for preprocess_vars_cache. + + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + num_inputs = len(x) + generator_func = functools.partial( + tf.random_uniform, [], maxval=num_cases, dtype=tf.int32) + rand_sel = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES, + preprocess_vars_cache, key) + + # Pass the real x only to one of the func calls. + tuples = [list() for t in x] + for case in range(num_cases): + new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x] + output = func(tuple(new_x), case) + for j in range(num_inputs): + tuples[j].append(output[j]) + + for i in range(num_inputs): + tuples[i] = control_flow_ops.merge(tuples[i])[0] + return tuple(tuples) + + +def _get_or_create_preprocess_rand_vars(generator_func, + function_id, + preprocess_vars_cache, + key=''): + """Returns a tensor stored in preprocess_vars_cache or using generator_func. + + If the tensor was previously generated and appears in the PreprocessorCache, + the previously generated tensor will be returned. Otherwise, a new tensor + is generated using generator_func and stored in the cache. + + Args: + generator_func: A 0-argument function that generates a tensor. + function_id: identifier for the preprocessing function used. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + key: identifier for the variable stored. + Returns: + The generated tensor. + """ + if preprocess_vars_cache is not None: + var = preprocess_vars_cache.get(function_id, key) + if var is None: + var = generator_func() + preprocess_vars_cache.update(function_id, key, var) + else: + var = generator_func() + return var + + +def _random_integer(minval, maxval, seed): + """Returns a random 0-D tensor between minval and maxval. + + Args: + minval: minimum value of the random tensor. + maxval: maximum value of the random tensor. + seed: random seed. + + Returns: + A random 0-D tensor between minval and maxval. + """ + return tf.random_uniform( + [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed) + + +# TODO(mttang): This method is needed because the current +# tf.image.rgb_to_grayscale method does not support quantization. Replace with +# tf.image.rgb_to_grayscale after quantization support is added. +def _rgb_to_grayscale(images, name=None): + """Converts one or more images from RGB to Grayscale. + + Outputs a tensor of the same `DType` and rank as `images`. The size of the + last dimension of the output is 1, containing the Grayscale value of the + pixels. + + Args: + images: The RGB tensor to convert. Last dimension must have size 3 and + should contain RGB values. + name: A name for the operation (optional). + + Returns: + The converted grayscale image(s). + """ + with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name: + images = tf.convert_to_tensor(images, name='images') + # Remember original dtype to so we can convert back if needed + orig_dtype = images.dtype + flt_image = tf.image.convert_image_dtype(images, tf.float32) + + # Reference for converting between RGB and grayscale. + # https://en.wikipedia.org/wiki/Luma_%28video%29 + rgb_weights = [0.2989, 0.5870, 0.1140] + rank_1 = tf.expand_dims(tf.rank(images) - 1, 0) + gray_float = tf.reduce_sum( + flt_image * rgb_weights, rank_1, keep_dims=True) + gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) + return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name) + + +def normalize_image(image, original_minval, original_maxval, target_minval, + target_maxval): + """Normalizes pixel values in the image. + + Moves the pixel values from the current [original_minval, original_maxval] + range to a the [target_minval, target_maxval] range. + + Args: + image: rank 3 float32 tensor containing 1 + image -> [height, width, channels]. + original_minval: current image minimum value. + original_maxval: current image maximum value. + target_minval: target image minimum value. + target_maxval: target image maximum value. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('NormalizeImage', values=[image]): + original_minval = float(original_minval) + original_maxval = float(original_maxval) + target_minval = float(target_minval) + target_maxval = float(target_maxval) + image = tf.cast(image, dtype=tf.float32) + image = tf.subtract(image, original_minval) + image = tf.multiply(image, (target_maxval - target_minval) / + (original_maxval - original_minval)) + image = tf.add(image, target_minval) + return image + + +def retain_boxes_above_threshold(boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + threshold=0.0): + """Retains boxes whose label weight is above a given threshold. + + If the label weight for a box is missing (represented by NaN), the box is + retained. The boxes that don't pass the threshold will not appear in the + returned tensor. + + Args: + boxes: float32 tensor of shape [num_instance, 4] representing boxes + location in normalized coordinates. + labels: rank 1 int32 tensor of shape [num_instance] containing the object + classes. + label_weights: float32 tensor of shape [num_instance] representing the + weight for each box. + label_confidences: float32 tensor of shape [num_instance] representing the + confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks are of + the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized + coordinates. + threshold: scalar python float. + + Returns: + retained_boxes: [num_retained_instance, 4] + retianed_labels: [num_retained_instance] + retained_label_weights: [num_retained_instance] + + If multiclass_scores, masks, or keypoints are not None, the function also + returns: + + retained_multiclass_scores: [num_retained_instance, num_classes] + retained_masks: [num_retained_instance, height, width] + retained_keypoints: [num_retained_instance, num_keypoints, 2] + """ + with tf.name_scope('RetainBoxesAboveThreshold', + values=[boxes, labels, label_weights]): + indices = tf.where( + tf.logical_or(label_weights > threshold, tf.is_nan(label_weights))) + indices = tf.squeeze(indices, axis=1) + retained_boxes = tf.gather(boxes, indices) + retained_labels = tf.gather(labels, indices) + retained_label_weights = tf.gather(label_weights, indices) + result = [retained_boxes, retained_labels, retained_label_weights] + + if label_confidences is not None: + retained_label_confidences = tf.gather(label_confidences, indices) + result.append(retained_label_confidences) + + if multiclass_scores is not None: + retained_multiclass_scores = tf.gather(multiclass_scores, indices) + result.append(retained_multiclass_scores) + + if masks is not None: + retained_masks = tf.gather(masks, indices) + result.append(retained_masks) + + if keypoints is not None: + retained_keypoints = tf.gather(keypoints, indices) + result.append(retained_keypoints) + + return result + + +def drop_label_probabilistically(boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + dropped_label=None, + drop_probability=0.0, + seed=None): + """Drops boxes of a certain label with probability drop_probability. + + Boxes of the label dropped_label will not appear in the returned tensor. + + Args: + boxes: float32 tensor of shape [num_instance, 4] representing boxes + location in normalized coordinates. + labels: rank 1 int32 tensor of shape [num_instance] containing the object + classes. + label_weights: float32 tensor of shape [num_instance] representing the + weight for each box. + label_confidences: float32 tensor of shape [num_instance] representing the + confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks are of + the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized + coordinates. + dropped_label: int32 id of label to drop. + drop_probability: float32 probability of dropping a label. + seed: random seed. + + Returns: + retained_boxes: [num_retained_instance, 4] + retianed_labels: [num_retained_instance] + retained_label_weights: [num_retained_instance] + + If multiclass_scores, masks, or keypoints are not None, the function also + returns: + + retained_multiclass_scores: [num_retained_instance, num_classes] + retained_masks: [num_retained_instance, height, width] + retained_keypoints: [num_retained_instance, num_keypoints, 2] + """ + with tf.name_scope('DropLabelProbabilistically', + values=[boxes, labels]): + indices = tf.where( + tf.logical_or( + tf.random_uniform(tf.shape(labels), seed=seed) > drop_probability, + tf.not_equal(labels, dropped_label))) + indices = tf.squeeze(indices, axis=1) + + retained_boxes = tf.gather(boxes, indices) + retained_labels = tf.gather(labels, indices) + retained_label_weights = tf.gather(label_weights, indices) + result = [retained_boxes, retained_labels, retained_label_weights] + + if label_confidences is not None: + retained_label_confidences = tf.gather(label_confidences, indices) + result.append(retained_label_confidences) + + if multiclass_scores is not None: + retained_multiclass_scores = tf.gather(multiclass_scores, indices) + result.append(retained_multiclass_scores) + + if masks is not None: + retained_masks = tf.gather(masks, indices) + result.append(retained_masks) + + if keypoints is not None: + retained_keypoints = tf.gather(keypoints, indices) + result.append(retained_keypoints) + + return result + + +def remap_labels(labels, + original_labels=None, + new_label=None): + """Remaps labels that have an id in original_labels to new_label. + + Args: + labels: rank 1 int32 tensor of shape [num_instance] containing the object + classes. + original_labels: int list of original labels that should be mapped from. + new_label: int label to map to + Returns: + Remapped labels + """ + new_labels = labels + for original_label in original_labels: + change = tf.where( + tf.equal(new_labels, original_label), + tf.add(tf.zeros_like(new_labels), new_label - original_label), + tf.zeros_like(new_labels)) + new_labels = tf.add( + new_labels, + change) + new_labels = tf.reshape(new_labels, tf.shape(labels)) + return new_labels + + +def _flip_boxes_left_right(boxes): + """Left-right flip the boxes. + + Args: + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + + Returns: + Flipped boxes. + """ + ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) + flipped_xmin = tf.subtract(1.0, xmax) + flipped_xmax = tf.subtract(1.0, xmin) + flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1) + return flipped_boxes + + +def _flip_boxes_up_down(boxes): + """Up-down flip the boxes. + + Args: + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + + Returns: + Flipped boxes. + """ + ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) + flipped_ymin = tf.subtract(1.0, ymax) + flipped_ymax = tf.subtract(1.0, ymin) + flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1) + return flipped_boxes + + +def _rot90_boxes(boxes): + """Rotate boxes counter-clockwise by 90 degrees. + + Args: + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + + Returns: + Rotated boxes. + """ + ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) + rotated_ymin = tf.subtract(1.0, xmax) + rotated_ymax = tf.subtract(1.0, xmin) + rotated_xmin = ymin + rotated_xmax = ymax + rotated_boxes = tf.concat( + [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1) + return rotated_boxes + + +def _flip_masks_left_right(masks): + """Left-right flip masks. + + Args: + masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + + Returns: + flipped masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + """ + return masks[:, :, ::-1] + + +def _flip_masks_up_down(masks): + """Up-down flip masks. + + Args: + masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + + Returns: + flipped masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + """ + return masks[:, ::-1, :] + + +def _rot90_masks(masks): + """Rotate masks counter-clockwise by 90 degrees. + + Args: + masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + + Returns: + rotated masks: rank 3 float32 tensor with shape + [num_instances, height, width] representing instance masks. + """ + masks = tf.transpose(masks, [0, 2, 1]) + return masks[:, ::-1, :] + + +def random_horizontal_flip(image, + boxes=None, + masks=None, + keypoints=None, + keypoint_flip_permutation=None, + seed=None, + preprocess_vars_cache=None): + """Randomly flips the image and detections horizontally. + + The probability of flipping the image is 50%. + + Args: + image: rank 3 float32 tensor with shape [height, width, channels]. + boxes: (optional) rank 2 float32 tensor with shape [N, 4] + containing the bounding boxes. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip + permutation. + seed: random seed + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + + If boxes, masks, keypoints, and keypoint_flip_permutation are not None, + the function also returns the following tensors. + + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + + Raises: + ValueError: if keypoints are provided but keypoint_flip_permutation is not. + """ + + def _flip_image(image): + # flip image + image_flipped = tf.image.flip_left_right(image) + return image_flipped + + if keypoints is not None and keypoint_flip_permutation is None: + raise ValueError( + 'keypoints are provided but keypoints_flip_permutation is not provided') + + with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): + result = [] + # random variable defining whether to do flip or not + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_a_flip_random = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP, + preprocess_vars_cache) + do_a_flip_random = tf.greater(do_a_flip_random, 0.5) + + # flip image + image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) + result.append(image) + + # flip boxes + if boxes is not None: + boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes), + lambda: boxes) + result.append(boxes) + + # flip masks + if masks is not None: + masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks), + lambda: masks) + result.append(masks) + + # flip keypoints + if keypoints is not None and keypoint_flip_permutation is not None: + permutation = keypoint_flip_permutation + keypoints = tf.cond( + do_a_flip_random, + lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation), + lambda: keypoints) + result.append(keypoints) + + return tuple(result) + + +def random_vertical_flip(image, + boxes=None, + masks=None, + keypoints=None, + keypoint_flip_permutation=None, + seed=None, + preprocess_vars_cache=None): + """Randomly flips the image and detections vertically. + + The probability of flipping the image is 50%. + + Args: + image: rank 3 float32 tensor with shape [height, width, channels]. + boxes: (optional) rank 2 float32 tensor with shape [N, 4] + containing the bounding boxes. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip + permutation. + seed: random seed + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + + If boxes, masks, keypoints, and keypoint_flip_permutation are not None, + the function also returns the following tensors. + + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + + Raises: + ValueError: if keypoints are provided but keypoint_flip_permutation is not. + """ + + def _flip_image(image): + # flip image + image_flipped = tf.image.flip_up_down(image) + return image_flipped + + if keypoints is not None and keypoint_flip_permutation is None: + raise ValueError( + 'keypoints are provided but keypoints_flip_permutation is not provided') + + with tf.name_scope('RandomVerticalFlip', values=[image, boxes]): + result = [] + # random variable defining whether to do flip or not + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_a_flip_random = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP, + preprocess_vars_cache) + do_a_flip_random = tf.greater(do_a_flip_random, 0.5) + + # flip image + image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) + result.append(image) + + # flip boxes + if boxes is not None: + boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes), + lambda: boxes) + result.append(boxes) + + # flip masks + if masks is not None: + masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks), + lambda: masks) + result.append(masks) + + # flip keypoints + if keypoints is not None and keypoint_flip_permutation is not None: + permutation = keypoint_flip_permutation + keypoints = tf.cond( + do_a_flip_random, + lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation), + lambda: keypoints) + result.append(keypoints) + + return tuple(result) + + +def random_rotation90(image, + boxes=None, + masks=None, + keypoints=None, + seed=None, + preprocess_vars_cache=None): + """Randomly rotates the image and detections 90 degrees counter-clockwise. + + The probability of rotating the image is 50%. This can be combined with + random_horizontal_flip and random_vertical_flip to produce an output with a + uniform distribution of the eight possible 90 degree rotation / reflection + combinations. + + Args: + image: rank 3 float32 tensor with shape [height, width, channels]. + boxes: (optional) rank 2 float32 tensor with shape [N, 4] + containing the bounding boxes. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + seed: random seed + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + + If boxes, masks, and keypoints, are not None, + the function also returns the following tensors. + + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + + def _rot90_image(image): + # flip image + image_rotated = tf.image.rot90(image) + return image_rotated + + with tf.name_scope('RandomRotation90', values=[image, boxes]): + result = [] + + # random variable defining whether to rotate by 90 degrees or not + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_a_rot90_random = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.ROTATION90, + preprocess_vars_cache) + do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5) + + # flip image + image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image), + lambda: image) + result.append(image) + + # flip boxes + if boxes is not None: + boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes), + lambda: boxes) + result.append(boxes) + + # flip masks + if masks is not None: + masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks), + lambda: masks) + result.append(masks) + + # flip keypoints + if keypoints is not None: + keypoints = tf.cond( + do_a_rot90_random, + lambda: keypoint_ops.rot90(keypoints), + lambda: keypoints) + result.append(keypoints) + + return tuple(result) + + +def random_pixel_value_scale(image, + minval=0.9, + maxval=1.1, + seed=None, + preprocess_vars_cache=None): + """Scales each value in the pixels of the image. + + This function scales each pixel independent of the other ones. + For each value in image tensor, draws a random number between + minval and maxval and multiples the values with them. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + minval: lower ratio of scaling pixel values. + maxval: upper ratio of scaling pixel values. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('RandomPixelValueScale', values=[image]): + generator_func = functools.partial( + tf.random_uniform, tf.shape(image), + minval=minval, maxval=maxval, + dtype=tf.float32, seed=seed) + color_coef = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE, + preprocess_vars_cache) + + image = tf.multiply(image, color_coef) + image = tf.clip_by_value(image, 0.0, 255.0) + + return image + + +def random_image_scale(image, + masks=None, + min_scale_ratio=0.5, + max_scale_ratio=2.0, + seed=None, + preprocess_vars_cache=None): + """Scales the image size. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels]. + masks: (optional) rank 3 float32 tensor containing masks with + size [height, width, num_masks]. The value is set to None if there are no + masks. + min_scale_ratio: minimum scaling ratio. + max_scale_ratio: maximum scaling ratio. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + masks: If masks is not none, resized masks which are the same rank as input + masks will be returned. + """ + with tf.name_scope('RandomImageScale', values=[image]): + result = [] + image_shape = tf.shape(image) + image_height = image_shape[0] + image_width = image_shape[1] + generator_func = functools.partial( + tf.random_uniform, [], + minval=min_scale_ratio, maxval=max_scale_ratio, + dtype=tf.float32, seed=seed) + size_coef = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE, + preprocess_vars_cache) + + image_newysize = tf.cast( + tf.multiply(tf.cast(image_height, dtype=tf.float32), size_coef), + dtype=tf.int32) + image_newxsize = tf.cast( + tf.multiply(tf.cast(image_width, dtype=tf.float32), size_coef), + dtype=tf.int32) + image = tf.image.resize_images( + image, [image_newysize, image_newxsize], align_corners=True) + result.append(image) + if masks is not None: + masks = tf.image.resize_images( + masks, [image_newysize, image_newxsize], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, + align_corners=True) + result.append(masks) + return tuple(result) + + +def _augment_only_rgb_channels(image, augment_function): + """Augments only the RGB slice of an image with additional channels.""" + rgb_slice = image[:, :, :3] + augmented_rgb_slice = augment_function(rgb_slice) + image = tf.concat([augmented_rgb_slice, image[:, :, 3:]], -1) + return image + + +def random_rgb_to_gray(image, + probability=0.1, + seed=None, + preprocess_vars_cache=None): + """Changes the image from RGB to Grayscale with the given probability. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + probability: the probability of returning a grayscale image. + The probability should be a number between [0, 1]. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + """ + def _image_to_gray(image): + image_gray1 = _rgb_to_grayscale(image) + image_gray3 = tf.image.grayscale_to_rgb(image_gray1) + return image_gray3 + + with tf.name_scope('RandomRGBtoGray', values=[image]): + # random variable defining whether to change to grayscale or not + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_gray_random = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY, + preprocess_vars_cache) + + image = tf.cond( + tf.greater(do_gray_random, probability), lambda: image, + lambda: _augment_only_rgb_channels(image, _image_to_gray)) + + return image + + +def random_adjust_brightness(image, + max_delta=0.2, + seed=None, + preprocess_vars_cache=None): + """Randomly adjusts brightness. + + Makes sure the output image is still between 0 and 255. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + max_delta: how much to change the brightness. A value between [0, 1). + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + boxes: boxes which is the same shape as input boxes. + """ + with tf.name_scope('RandomAdjustBrightness', values=[image]): + generator_func = functools.partial(tf.random_uniform, [], + -max_delta, max_delta, seed=seed) + delta = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS, + preprocess_vars_cache) + + def _adjust_brightness(image): + image = tf.image.adjust_brightness(image / 255, delta) * 255 + image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) + return image + + image = _augment_only_rgb_channels(image, _adjust_brightness) + return image + + +def random_adjust_contrast(image, + min_delta=0.8, + max_delta=1.25, + seed=None, + preprocess_vars_cache=None): + """Randomly adjusts contrast. + + Makes sure the output image is still between 0 and 255. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + min_delta: see max_delta. + max_delta: how much to change the contrast. Contrast will change with a + value between min_delta and max_delta. This value will be + multiplied to the current contrast of the image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('RandomAdjustContrast', values=[image]): + generator_func = functools.partial(tf.random_uniform, [], + min_delta, max_delta, seed=seed) + contrast_factor = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST, + preprocess_vars_cache) + + def _adjust_contrast(image): + image = tf.image.adjust_contrast(image / 255, contrast_factor) * 255 + image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) + return image + image = _augment_only_rgb_channels(image, _adjust_contrast) + return image + + +def random_adjust_hue(image, + max_delta=0.02, + seed=None, + preprocess_vars_cache=None): + """Randomly adjusts hue. + + Makes sure the output image is still between 0 and 255. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + max_delta: change hue randomly with a value between 0 and max_delta. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('RandomAdjustHue', values=[image]): + generator_func = functools.partial(tf.random_uniform, [], + -max_delta, max_delta, seed=seed) + delta = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE, + preprocess_vars_cache) + def _adjust_hue(image): + image = tf.image.adjust_hue(image / 255, delta) * 255 + image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) + return image + image = _augment_only_rgb_channels(image, _adjust_hue) + return image + + +def random_adjust_saturation(image, + min_delta=0.8, + max_delta=1.25, + seed=None, + preprocess_vars_cache=None): + """Randomly adjusts saturation. + + Makes sure the output image is still between 0 and 255. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + min_delta: see max_delta. + max_delta: how much to change the saturation. Saturation will change with a + value between min_delta and max_delta. This value will be + multiplied to the current saturation of the image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('RandomAdjustSaturation', values=[image]): + generator_func = functools.partial(tf.random_uniform, [], + min_delta, max_delta, seed=seed) + saturation_factor = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.ADJUST_SATURATION, + preprocess_vars_cache) + def _adjust_saturation(image): + image = tf.image.adjust_saturation(image / 255, saturation_factor) * 255 + image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) + return image + image = _augment_only_rgb_channels(image, _adjust_saturation) + return image + + +def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None): + """Randomly distorts color. + + Randomly distorts color using a combination of brightness, hue, contrast and + saturation changes. Makes sure the output image is still between 0 and 255. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + color_ordering: Python int, a type of distortion (valid values: 0, 1). + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same shape as input image. + + Raises: + ValueError: if color_ordering is not in {0, 1}. + """ + with tf.name_scope('RandomDistortColor', values=[image]): + if color_ordering == 0: + image = random_adjust_brightness( + image, max_delta=32. / 255., + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_saturation( + image, min_delta=0.5, max_delta=1.5, + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_hue( + image, max_delta=0.2, + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_contrast( + image, min_delta=0.5, max_delta=1.5, + preprocess_vars_cache=preprocess_vars_cache) + + elif color_ordering == 1: + image = random_adjust_brightness( + image, max_delta=32. / 255., + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_contrast( + image, min_delta=0.5, max_delta=1.5, + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_saturation( + image, min_delta=0.5, max_delta=1.5, + preprocess_vars_cache=preprocess_vars_cache) + image = random_adjust_hue( + image, max_delta=0.2, + preprocess_vars_cache=preprocess_vars_cache) + else: + raise ValueError('color_ordering must be in {0, 1}') + return image + + +def random_jitter_boxes(boxes, ratio=0.05, seed=None): + """Randomly jitter boxes in image. + + Args: + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + ratio: The ratio of the box width and height that the corners can jitter. + For example if the width is 100 pixels and ratio is 0.05, + the corners can jitter up to 5 pixels in the x direction. + seed: random seed. + + Returns: + boxes: boxes which is the same shape as input boxes. + """ + def random_jitter_box(box, ratio, seed): + """Randomly jitter box. + + Args: + box: bounding box [1, 1, 4]. + ratio: max ratio between jittered box and original box, + a number between [0, 0.5]. + seed: random seed. + + Returns: + jittered_box: jittered box. + """ + rand_numbers = tf.random_uniform( + [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed) + box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1]) + box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0]) + hw_coefs = tf.stack([box_height, box_width, box_height, box_width]) + hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers) + jittered_box = tf.add(box, hw_rand_coefs) + jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0) + return jittered_box + + with tf.name_scope('RandomJitterBoxes', values=[boxes]): + # boxes are [N, 4]. Lets first make them [N, 1, 1, 4] + boxes_shape = tf.shape(boxes) + boxes = tf.expand_dims(boxes, 1) + boxes = tf.expand_dims(boxes, 2) + + distorted_boxes = tf.map_fn( + lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32) + + distorted_boxes = tf.reshape(distorted_boxes, boxes_shape) + + return distorted_boxes + + +def _strict_random_crop_image(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + min_object_covered=1.0, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.1, 1.0), + overlap_thresh=0.3, + clip_boxes=True, + preprocess_vars_cache=None): + """Performs random crop. + + Note: Keypoint coordinates that are outside the crop will be set to NaN, which + is consistent with the original keypoint encoding for non-existing keypoints. + This function always crops the image and is supposed to be used by + `random_crop_image` function which sometimes returns the image unchanged. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes with shape + [num_instances, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If label_weights, multiclass_scores, masks, or keypoints is not None, the + function also returns: + label_weights: rank 1 float32 tensor with shape [num_instances]. + multiclass_scores: rank 2 float32 tensor with shape + [num_instances, num_classes] + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + with tf.name_scope('RandomCropImage', values=[image, boxes]): + image_shape = tf.shape(image) + + # boxes are [N, 4]. Lets first make them [N, 1, 4]. + boxes_expanded = tf.expand_dims( + tf.clip_by_value( + boxes, clip_value_min=0.0, clip_value_max=1.0), 1) + + generator_func = functools.partial( + tf.image.sample_distorted_bounding_box, + image_shape, + bounding_boxes=boxes_expanded, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=100, + use_image_if_no_bounding_boxes=True) + + # for ssd cropping, each value of min_object_covered has its own + # cached random variable + sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE, + preprocess_vars_cache, key=min_object_covered) + + im_box_begin, im_box_size, im_box = sample_distorted_bounding_box + + new_image = tf.slice(image, im_box_begin, im_box_size) + new_image.set_shape([None, None, image.get_shape()[2]]) + + # [1, 4] + im_box_rank2 = tf.squeeze(im_box, axis=[0]) + # [4] + im_box_rank1 = tf.squeeze(im_box) + + boxlist = box_list.BoxList(boxes) + boxlist.add_field('labels', labels) + + if label_weights is not None: + boxlist.add_field('label_weights', label_weights) + + if label_confidences is not None: + boxlist.add_field('label_confidences', label_confidences) + + if multiclass_scores is not None: + boxlist.add_field('multiclass_scores', multiclass_scores) + + im_boxlist = box_list.BoxList(im_box_rank2) + + # remove boxes that are outside cropped image + boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window( + boxlist, im_box_rank1) + + # remove boxes that are outside image + overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( + boxlist, im_boxlist, overlap_thresh) + + # change the coordinate of the remaining boxes + new_labels = overlapping_boxlist.get_field('labels') + new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, + im_box_rank1) + new_boxes = new_boxlist.get() + if clip_boxes: + new_boxes = tf.clip_by_value( + new_boxes, clip_value_min=0.0, clip_value_max=1.0) + + result = [new_image, new_boxes, new_labels] + + if label_weights is not None: + new_label_weights = overlapping_boxlist.get_field('label_weights') + result.append(new_label_weights) + + if label_confidences is not None: + new_label_confidences = overlapping_boxlist.get_field('label_confidences') + result.append(new_label_confidences) + + if multiclass_scores is not None: + new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores') + result.append(new_multiclass_scores) + + if masks is not None: + masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids) + masks_of_boxes_completely_inside_window = tf.gather( + masks_of_boxes_inside_window, keep_ids) + masks_box_begin = [0, im_box_begin[0], im_box_begin[1]] + masks_box_size = [-1, im_box_size[0], im_box_size[1]] + new_masks = tf.slice( + masks_of_boxes_completely_inside_window, + masks_box_begin, masks_box_size) + result.append(new_masks) + + if keypoints is not None: + keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids) + keypoints_of_boxes_completely_inside_window = tf.gather( + keypoints_of_boxes_inside_window, keep_ids) + new_keypoints = keypoint_ops.change_coordinate_frame( + keypoints_of_boxes_completely_inside_window, im_box_rank1) + if clip_boxes: + new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, + [0.0, 0.0, 1.0, 1.0]) + result.append(new_keypoints) + + return tuple(result) + + +def random_crop_image(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + min_object_covered=1.0, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.1, 1.0), + overlap_thresh=0.3, + clip_boxes=True, + random_coef=0.0, + seed=None, + preprocess_vars_cache=None): + """Randomly crops the image. + + Given the input image and its bounding boxes, this op randomly + crops a subimage. Given a user-provided set of input constraints, + the crop window is resampled until it satisfies these constraints. + If within 100 trials it is unable to find a valid crop, the original + image is returned. See the Args section for a description of the input + constraints. Both input boxes and returned Boxes are in normalized + form (e.g., lie in the unit square [0, 1]). + This function will return the original image with probability random_coef. + + Note: Keypoint coordinates that are outside the crop will be set to NaN, which + is consistent with the original keypoint encoding for non-existing keypoints. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes with shape + [num_instances, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances]. + representing the confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: Image shape will be [new_height, new_width, channels]. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. + labels: new labels. + + If label_weights, multiclass_scores, masks, or keypoints is not None, the + function also returns: + label_weights: rank 1 float32 tensor with shape [num_instances]. + multiclass_scores: rank 2 float32 tensor with shape + [num_instances, num_classes] + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + + def strict_random_crop_image_fn(): + return _strict_random_crop_image( + image, + boxes, + labels, + label_weights, + label_confidences=label_confidences, + multiclass_scores=multiclass_scores, + masks=masks, + keypoints=keypoints, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + overlap_thresh=overlap_thresh, + clip_boxes=clip_boxes, + preprocess_vars_cache=preprocess_vars_cache) + + # avoids tf.cond to make faster RCNN training on borg. See b/140057645. + if random_coef < sys.float_info.min: + result = strict_random_crop_image_fn() + else: + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_a_crop_random = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE, + preprocess_vars_cache) + do_a_crop_random = tf.greater(do_a_crop_random, random_coef) + + outputs = [image, boxes, labels] + + if label_weights is not None: + outputs.append(label_weights) + if label_confidences is not None: + outputs.append(label_confidences) + if multiclass_scores is not None: + outputs.append(multiclass_scores) + if masks is not None: + outputs.append(masks) + if keypoints is not None: + outputs.append(keypoints) + + result = tf.cond(do_a_crop_random, strict_random_crop_image_fn, + lambda: tuple(outputs)) + return result + + +def random_pad_image(image, + boxes, + keypoints=None, + min_image_size=None, + max_image_size=None, + pad_color=None, + seed=None, + preprocess_vars_cache=None): + """Randomly pads the image. + + This function randomly pads the image with zeros. The final size of the + padded image will be between min_image_size and max_image_size. + if min_image_size is smaller than the input image size, min_image_size will + be set to the input image size. The same for max_image_size. The input image + will be located at a uniformly random location inside the padded image. + The relative location of the boxes to the original image will remain the same. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + keypoints: (optional) rank 3 float32 tensor with shape + [N, num_keypoints, 2]. The keypoints are in y-x normalized + coordinates. + min_image_size: a tensor of size [min_height, min_width], type tf.int32. + If passed as None, will be set to image size + [height, width]. + max_image_size: a tensor of size [max_height, max_width], type tf.int32. + If passed as None, will be set to twice the + image [height * 2, width * 2]. + pad_color: padding color. A rank 1 tensor of [channels] with dtype= + tf.float32. if set as None, it will be set to average color of + the input image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: Image shape will be [new_height, new_width, channels]. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. + + if keypoints is not None, the function also returns: + keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2] + """ + if pad_color is None: + pad_color = tf.reduce_mean(image, axis=[0, 1]) + + image_shape = tf.shape(image) + image_height = image_shape[0] + image_width = image_shape[1] + + if max_image_size is None: + max_image_size = tf.stack([image_height * 2, image_width * 2]) + max_image_size = tf.maximum(max_image_size, + tf.stack([image_height, image_width])) + + if min_image_size is None: + min_image_size = tf.stack([image_height, image_width]) + min_image_size = tf.maximum(min_image_size, + tf.stack([image_height, image_width])) + + target_height = tf.cond( + max_image_size[0] > min_image_size[0], + lambda: _random_integer(min_image_size[0], max_image_size[0], seed), + lambda: max_image_size[0]) + + target_width = tf.cond( + max_image_size[1] > min_image_size[1], + lambda: _random_integer(min_image_size[1], max_image_size[1], seed), + lambda: max_image_size[1]) + + offset_height = tf.cond( + target_height > image_height, + lambda: _random_integer(0, target_height - image_height, seed), + lambda: tf.constant(0, dtype=tf.int32)) + + offset_width = tf.cond( + target_width > image_width, + lambda: _random_integer(0, target_width - image_width, seed), + lambda: tf.constant(0, dtype=tf.int32)) + + gen_func = lambda: (target_height, target_width, offset_height, offset_width) + params = _get_or_create_preprocess_rand_vars( + gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE, + preprocess_vars_cache) + target_height, target_width, offset_height, offset_width = params + + new_image = tf.image.pad_to_bounding_box( + image, + offset_height=offset_height, + offset_width=offset_width, + target_height=target_height, + target_width=target_width) + + # Setting color of the padded pixels + image_ones = tf.ones_like(image) + image_ones_padded = tf.image.pad_to_bounding_box( + image_ones, + offset_height=offset_height, + offset_width=offset_width, + target_height=target_height, + target_width=target_width) + image_color_padded = (1.0 - image_ones_padded) * pad_color + new_image += image_color_padded + + # setting boxes + new_window = tf.cast( + tf.stack([ + -offset_height, -offset_width, target_height - offset_height, + target_width - offset_width + ]), + dtype=tf.float32) + new_window /= tf.cast( + tf.stack([image_height, image_width, image_height, image_width]), + dtype=tf.float32) + boxlist = box_list.BoxList(boxes) + new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window) + new_boxes = new_boxlist.get() + + result = [new_image, new_boxes] + + if keypoints is not None: + new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window) + result.append(new_keypoints) + + return tuple(result) + + +def random_absolute_pad_image(image, + boxes, + max_height_padding, + max_width_padding, + pad_color=None, + seed=None, + preprocess_vars_cache=None): + """Randomly pads the image by small absolute amounts. + + As random_pad_image above, but the padding is of size [0, max_height_padding] + or [0, max_width_padding] instead of padding to a fixed size of + max_height_padding for all images. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of + height padding. The padding will be chosen uniformly at + random from [0, max_height_padding). + max_width_padding: a scalar tf.int32 tensor denoting the maximum amount of + width padding. The padding will be chosen uniformly at + random from [0, max_width_padding). + pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. + if set as None, it will be set to average color of the input + image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: Image shape will be [new_height, new_width, channels]. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. + """ + min_image_size = tf.shape(image)[:2] + max_image_size = min_image_size + tf.cast( + [max_height_padding, max_width_padding], dtype=tf.int32) + return random_pad_image(image, boxes, min_image_size=min_image_size, + max_image_size=max_image_size, pad_color=pad_color, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + +def random_crop_pad_image(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + min_object_covered=1.0, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.1, 1.0), + overlap_thresh=0.3, + clip_boxes=True, + random_coef=0.0, + min_padded_size_ratio=(1.0, 1.0), + max_padded_size_ratio=(2.0, 2.0), + pad_color=None, + seed=None, + preprocess_vars_cache=None): + """Randomly crops and pads the image. + + Given an input image and its bounding boxes, this op first randomly crops + the image and then randomly pads the image with background values. Parameters + min_padded_size_ratio and max_padded_size_ratio, determine the range of the + final output image size. Specifically, the final image size will have a size + in the range of min_padded_size_ratio * tf.shape(image) and + max_padded_size_ratio * tf.shape(image). Note that these ratios are with + respect to the size of the original image, so we can't capture the same + effect easily by independently applying RandomCropImage + followed by RandomPadImage. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: rank 1 float32 containing the label weights. + label_confidences: rank 1 float32 containing the label confidences. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + min_padded_size_ratio: min ratio of padded image height and width to the + input image's height and width. + max_padded_size_ratio: max ratio of padded image height and width to the + input image's height and width. + pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. + if set as None, it will be set to average color of the randomly + cropped image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + padded_image: padded image. + padded_boxes: boxes which is the same rank as input boxes. Boxes are in + normalized form. + cropped_labels: cropped labels. + if label_weights is not None also returns: + cropped_label_weights: cropped label weights. + if multiclass_scores is not None also returns: + cropped_multiclass_scores: cropped_multiclass_scores. + + """ + image_size = tf.shape(image) + image_height = image_size[0] + image_width = image_size[1] + result = random_crop_image( + image=image, + boxes=boxes, + labels=labels, + label_weights=label_weights, + label_confidences=label_confidences, + multiclass_scores=multiclass_scores, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + overlap_thresh=overlap_thresh, + clip_boxes=clip_boxes, + random_coef=random_coef, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + cropped_image, cropped_boxes, cropped_labels = result[:3] + + min_image_size = tf.cast( + tf.cast(tf.stack([image_height, image_width]), dtype=tf.float32) * + min_padded_size_ratio, + dtype=tf.int32) + max_image_size = tf.cast( + tf.cast(tf.stack([image_height, image_width]), dtype=tf.float32) * + max_padded_size_ratio, + dtype=tf.int32) + + padded_image, padded_boxes = random_pad_image( + cropped_image, + cropped_boxes, + min_image_size=min_image_size, + max_image_size=max_image_size, + pad_color=pad_color, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + cropped_padded_output = (padded_image, padded_boxes, cropped_labels) + + index = 3 + if label_weights is not None: + cropped_label_weights = result[index] + cropped_padded_output += (cropped_label_weights,) + index += 1 + + if label_confidences is not None: + cropped_label_confidences = result[index] + cropped_padded_output += (cropped_label_confidences,) + index += 1 + + if multiclass_scores is not None: + cropped_multiclass_scores = result[index] + cropped_padded_output += (cropped_multiclass_scores,) + + return cropped_padded_output + + +def random_crop_to_aspect_ratio(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + aspect_ratio=1.0, + overlap_thresh=0.3, + clip_boxes=True, + seed=None, + preprocess_vars_cache=None): + """Randomly crops an image to the specified aspect ratio. + + Randomly crops the a portion of the image such that the crop is of the + specified aspect ratio, and the crop is as large as possible. If the specified + aspect ratio is larger than the aspect ratio of the image, this op will + randomly remove rows from the top and bottom of the image. If the specified + aspect ratio is less than the aspect ratio of the image, this op will randomly + remove cols from the left and right of the image. If the specified aspect + ratio is the same as the aspect ratio of the image, this op will return the + image. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + aspect_ratio: the aspect ratio of cropped image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If label_weights, masks, keypoints, or multiclass_scores is not None, the + function also returns: + label_weights: rank 1 float32 tensor with shape [num_instances]. + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + multiclass_scores: rank 2 float32 tensor with shape + [num_instances, num_classes] + + Raises: + ValueError: If image is not a 3D tensor. + """ + if len(image.get_shape()) != 3: + raise ValueError('Image should be 3D tensor') + + with tf.name_scope('RandomCropToAspectRatio', values=[image]): + image_shape = tf.shape(image) + orig_height = image_shape[0] + orig_width = image_shape[1] + orig_aspect_ratio = tf.cast( + orig_width, dtype=tf.float32) / tf.cast( + orig_height, dtype=tf.float32) + new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) + + def target_height_fn(): + return tf.cast( + tf.round(tf.cast(orig_width, dtype=tf.float32) / new_aspect_ratio), + dtype=tf.int32) + + target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio, + lambda: orig_height, target_height_fn) + + def target_width_fn(): + return tf.cast( + tf.round(tf.cast(orig_height, dtype=tf.float32) * new_aspect_ratio), + dtype=tf.int32) + + target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio, + lambda: orig_width, target_width_fn) + + # either offset_height = 0 and offset_width is randomly chosen from + # [0, offset_width - target_width), or else offset_width = 0 and + # offset_height is randomly chosen from [0, offset_height - target_height) + offset_height = _random_integer(0, orig_height - target_height + 1, seed) + offset_width = _random_integer(0, orig_width - target_width + 1, seed) + + generator_func = lambda: (offset_height, offset_width) + offset_height, offset_width = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO, + preprocess_vars_cache) + + new_image = tf.image.crop_to_bounding_box( + image, offset_height, offset_width, target_height, target_width) + + im_box = tf.stack([ + tf.cast(offset_height, dtype=tf.float32) / + tf.cast(orig_height, dtype=tf.float32), + tf.cast(offset_width, dtype=tf.float32) / + tf.cast(orig_width, dtype=tf.float32), + tf.cast(offset_height + target_height, dtype=tf.float32) / + tf.cast(orig_height, dtype=tf.float32), + tf.cast(offset_width + target_width, dtype=tf.float32) / + tf.cast(orig_width, dtype=tf.float32) + ]) + + boxlist = box_list.BoxList(boxes) + boxlist.add_field('labels', labels) + + boxlist.add_field('label_weights', label_weights) + + if label_confidences is not None: + boxlist.add_field('label_confidences', label_confidences) + + if multiclass_scores is not None: + boxlist.add_field('multiclass_scores', multiclass_scores) + + im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0)) + + # remove boxes whose overlap with the image is less than overlap_thresh + overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( + boxlist, im_boxlist, overlap_thresh) + + # change the coordinate of the remaining boxes + new_labels = overlapping_boxlist.get_field('labels') + new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, + im_box) + if clip_boxes: + new_boxlist = box_list_ops.clip_to_window( + new_boxlist, tf.constant([0.0, 0.0, 1.0, 1.0], tf.float32)) + new_boxes = new_boxlist.get() + + result = [new_image, new_boxes, new_labels] + + new_label_weights = overlapping_boxlist.get_field('label_weights') + result.append(new_label_weights) + + if label_confidences is not None: + new_label_confidences = ( + overlapping_boxlist.get_field('label_confidences')) + result.append(new_label_confidences) + + if multiclass_scores is not None: + new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores') + result.append(new_multiclass_scores) + + if masks is not None: + masks_inside_window = tf.gather(masks, keep_ids) + masks_box_begin = tf.stack([0, offset_height, offset_width]) + masks_box_size = tf.stack([-1, target_height, target_width]) + new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size) + result.append(new_masks) + + if keypoints is not None: + keypoints_inside_window = tf.gather(keypoints, keep_ids) + new_keypoints = keypoint_ops.change_coordinate_frame( + keypoints_inside_window, im_box) + if clip_boxes: + new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, + [0.0, 0.0, 1.0, 1.0]) + result.append(new_keypoints) + + return tuple(result) + + +def random_pad_to_aspect_ratio(image, + boxes, + masks=None, + keypoints=None, + aspect_ratio=1.0, + min_padded_size_ratio=(1.0, 1.0), + max_padded_size_ratio=(2.0, 2.0), + seed=None, + preprocess_vars_cache=None): + """Randomly zero pads an image to the specified aspect ratio. + + Pads the image so that the resulting image will have the specified aspect + ratio without scaling less than the min_padded_size_ratio or more than the + max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio + is lower than what is possible to maintain the aspect ratio, then this method + will use the least padding to achieve the specified aspect ratio. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + aspect_ratio: aspect ratio of the final image. + min_padded_size_ratio: min ratio of padded image height and width to the + input image's height and width. + max_padded_size_ratio: max ratio of padded image height and width to the + input image's height and width. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If masks, or keypoints is not None, the function also returns: + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + + Raises: + ValueError: If image is not a 3D tensor. + """ + if len(image.get_shape()) != 3: + raise ValueError('Image should be 3D tensor') + + with tf.name_scope('RandomPadToAspectRatio', values=[image]): + image_shape = tf.shape(image) + image_height = tf.cast(image_shape[0], dtype=tf.float32) + image_width = tf.cast(image_shape[1], dtype=tf.float32) + image_aspect_ratio = image_width / image_height + new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) + target_height = tf.cond( + image_aspect_ratio <= new_aspect_ratio, + lambda: image_height, + lambda: image_width / new_aspect_ratio) + target_width = tf.cond( + image_aspect_ratio >= new_aspect_ratio, + lambda: image_width, + lambda: image_height * new_aspect_ratio) + + min_height = tf.maximum( + min_padded_size_ratio[0] * image_height, target_height) + min_width = tf.maximum( + min_padded_size_ratio[1] * image_width, target_width) + max_height = tf.maximum( + max_padded_size_ratio[0] * image_height, target_height) + max_width = tf.maximum( + max_padded_size_ratio[1] * image_width, target_width) + + max_scale = tf.minimum(max_height / target_height, max_width / target_width) + min_scale = tf.minimum( + max_scale, + tf.maximum(min_height / target_height, min_width / target_width)) + + generator_func = functools.partial(tf.random_uniform, [], + min_scale, max_scale, seed=seed) + scale = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO, + preprocess_vars_cache) + + target_height = tf.round(scale * target_height) + target_width = tf.round(scale * target_width) + + new_image = tf.image.pad_to_bounding_box( + image, 0, 0, tf.cast(target_height, dtype=tf.int32), + tf.cast(target_width, dtype=tf.int32)) + + im_box = tf.stack([ + 0.0, + 0.0, + target_height / image_height, + target_width / image_width + ]) + boxlist = box_list.BoxList(boxes) + new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box) + new_boxes = new_boxlist.get() + + result = [new_image, new_boxes] + + if masks is not None: + new_masks = tf.expand_dims(masks, -1) + new_masks = tf.image.pad_to_bounding_box( + new_masks, 0, 0, tf.cast(target_height, dtype=tf.int32), + tf.cast(target_width, dtype=tf.int32)) + new_masks = tf.squeeze(new_masks, [-1]) + result.append(new_masks) + + if keypoints is not None: + new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box) + result.append(new_keypoints) + + return tuple(result) + + +def random_black_patches(image, + max_black_patches=10, + probability=0.5, + size_to_image_ratio=0.1, + random_seed=None, + preprocess_vars_cache=None): + """Randomly adds some black patches to the image. + + This op adds up to max_black_patches square black patches of a fixed size + to the image where size is specified via the size_to_image_ratio parameter. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + max_black_patches: number of times that the function tries to add a + black box to the image. + probability: at each try, what is the chance of adding a box. + size_to_image_ratio: Determines the ratio of the size of the black patches + to the size of the image. + box_size = size_to_image_ratio * + min(image_width, image_height) + random_seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image + """ + def add_black_patch_to_image(image, idx): + """Function for adding one patch to the image. + + Args: + image: image + idx: counter for number of patches that could have been added + + Returns: + image with a randomly added black box + """ + image_shape = tf.shape(image) + image_height = image_shape[0] + image_width = image_shape[1] + box_size = tf.cast( + tf.multiply( + tf.minimum( + tf.cast(image_height, dtype=tf.float32), + tf.cast(image_width, dtype=tf.float32)), size_to_image_ratio), + dtype=tf.int32) + + generator_func = functools.partial(tf.random_uniform, [], minval=0.0, + maxval=(1.0 - size_to_image_ratio), + seed=random_seed) + normalized_y_min = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH, + preprocess_vars_cache, key=str(idx) + 'y') + normalized_x_min = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH, + preprocess_vars_cache, key=str(idx) + 'x') + + y_min = tf.cast( + normalized_y_min * tf.cast(image_height, dtype=tf.float32), + dtype=tf.int32) + x_min = tf.cast( + normalized_x_min * tf.cast(image_width, dtype=tf.float32), + dtype=tf.int32) + black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32) + mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min, + image_height, image_width) + image = tf.multiply(image, mask) + return image + + with tf.name_scope('RandomBlackPatchInImage', values=[image]): + for idx in range(max_black_patches): + generator_func = functools.partial(tf.random_uniform, [], + minval=0.0, maxval=1.0, + dtype=tf.float32, seed=random_seed) + random_prob = _get_or_create_preprocess_rand_vars( + generator_func, + preprocessor_cache.PreprocessorCache.BLACK_PATCHES, + preprocess_vars_cache, key=idx) + image = tf.cond( + tf.greater(random_prob, probability), lambda: image, + functools.partial(add_black_patch_to_image, image=image, idx=idx)) + return image + + +def random_jpeg_quality(image, + min_jpeg_quality=0, + max_jpeg_quality=100, + random_coef=0.0, + seed=None, + preprocess_vars_cache=None): + """Randomly encode the image to a random JPEG quality level. + + Args: + image: rank 3 float32 tensor with shape [height, width, channels] and + values in the range [0, 255]. + min_jpeg_quality: An int for the lower bound for selecting a random jpeg + quality level. + max_jpeg_quality: An int for the upper bound for selecting a random jpeg + quality level. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the encoded image, + and if it is 1.0, we will always get the original image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this function is called + multiple times with the same non-null cache, it will perform + deterministically. + + Returns: + image: image which is the same shape as input image. + """ + def _adjust_jpeg_quality(): + """Encodes the image as jpeg with a random quality and then decodes.""" + generator_func = functools.partial( + tf.random_uniform, [], + minval=min_jpeg_quality, + maxval=max_jpeg_quality, + dtype=tf.int32, + seed=seed) + quality = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.JPEG_QUALITY, + preprocess_vars_cache, key='quality') + + # Need to convert to uint8 before calling adjust_jpeg_quality since it + # assumes that float features are in the range [0, 1], where herein the + # range is [0, 255]. + image_uint8 = tf.cast(image, tf.uint8) + adjusted_image = tf.image.adjust_jpeg_quality(image_uint8, quality) + return tf.cast(adjusted_image, tf.float32) + + with tf.name_scope('RandomJpegQuality', values=[image]): + generator_func = functools.partial(tf.random_uniform, [], seed=seed) + do_encoding_random = _get_or_create_preprocess_rand_vars( + generator_func, preprocessor_cache.PreprocessorCache.JPEG_QUALITY, + preprocess_vars_cache) + do_encoding_random = tf.greater_equal(do_encoding_random, random_coef) + image = tf.cond(do_encoding_random, _adjust_jpeg_quality, + lambda: tf.cast(image, tf.float32)) + + return image + + +def random_downscale_to_target_pixels(image, + masks=None, + min_target_pixels=300000, + max_target_pixels=800000, + random_coef=0.0, + seed=None, + preprocess_vars_cache=None): + """Randomly downscales the image to a target number of pixels. + + If the image contains less than the chosen target number of pixels, it will + not be downscaled. + + Args: + image: Rank 3 float32 tensor with shape [height, width, channels] and + values in the range [0, 255]. + masks: (optional) Rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks are of + the same height, width as the input `image`. + min_target_pixels: Integer. An inclusive lower bound for for the target + number of pixels. + max_target_pixels: Integer. An exclusive upper bound for for the target + number of pixels. + random_coef: Float. Random coefficient that defines the chance of getting + the original image. If random_coef is 0, we will always apply downscaling, + and if it is 1.0, we will always get the original image. + seed: (optional) Integer. Random seed. + preprocess_vars_cache: (optional) PreprocessorCache object that records + previously performed augmentations. Updated in-place. If this function is + called multiple times with the same non-null cache, it will perform + deterministically. + + Returns: + Tuple with elements: + image: Resized image which is the same rank as input image. + masks: If masks is not None, resized masks which are the same rank as + the input masks. + + Raises: + ValueError: If min_target_pixels or max_target_pixels are not positive. + """ + if min_target_pixels <= 0: + raise ValueError('Minimum target pixels must be positive') + if max_target_pixels <= 0: + raise ValueError('Maximum target pixels must be positive') + + def _resize_image_to_target(target_height, target_width): + # pylint: disable=unbalanced-tuple-unpacking + new_image, _ = resize_image(image, None, target_height, target_width) + return (new_image,) + + def _resize_image_and_masks_to_target(target_height, target_width): + # pylint: disable=unbalanced-tuple-unpacking + new_image, new_masks, _ = resize_image(image, masks, target_height, + target_width) + return new_image, new_masks + + with tf.name_scope('RandomDownscaleToTargetPixels', values=[image]): + generator_fn = functools.partial(tf.random_uniform, [], seed=seed) + do_downscale_random = _get_or_create_preprocess_rand_vars( + generator_fn, + preprocessor_cache.PreprocessorCache.DOWNSCALE_TO_TARGET_PIXELS, + preprocess_vars_cache) + do_downscale_random = tf.greater_equal(do_downscale_random, random_coef) + + generator_fn = functools.partial( + tf.random_uniform, [], + minval=min_target_pixels, + maxval=max_target_pixels, + dtype=tf.int32, + seed=seed) + target_pixels = _get_or_create_preprocess_rand_vars( + generator_fn, + preprocessor_cache.PreprocessorCache.DOWNSCALE_TO_TARGET_PIXELS, + preprocess_vars_cache, + key='target_pixels') + + image_shape = tf.shape(image) + image_height = image_shape[0] + image_width = image_shape[1] + image_pixels = image_height * image_width + scale_factor = tf.sqrt( + tf.cast(target_pixels, dtype=tf.float32) / + tf.cast(image_pixels, dtype=tf.float32)) + target_height = tf.cast( + scale_factor * tf.cast(image_height, dtype=tf.float32), dtype=tf.int32) + target_width = tf.cast( + scale_factor * tf.cast(image_width, dtype=tf.float32), dtype=tf.int32) + image_larger_than_target = tf.greater(image_pixels, target_pixels) + + should_apply_resize = tf.logical_and(do_downscale_random, + image_larger_than_target) + if masks is not None: + resize_fn = functools.partial(_resize_image_and_masks_to_target, + target_height, target_width) + return tf.cond(should_apply_resize, resize_fn, + lambda: (tf.cast(image, dtype=tf.float32), masks)) + else: + resize_fn = lambda: _resize_image_to_target(target_height, target_width) + return tf.cond(should_apply_resize, resize_fn, + lambda: (tf.cast(image, dtype=tf.float32),)) + + +def random_patch_gaussian(image, + min_patch_size=1, + max_patch_size=250, + min_gaussian_stddev=0.0, + max_gaussian_stddev=1.0, + random_coef=0.0, + seed=None, + preprocess_vars_cache=None): + """Randomly applies gaussian noise to a random patch on the image. + + The gaussian noise is applied to the image with values scaled to the range + [0.0, 1.0]. The result of applying gaussian noise to the scaled image is + clipped to be within the range [0.0, 1.0], equivalent to the range + [0.0, 255.0] after rescaling the image back. + + See "Improving Robustness Without Sacrificing Accuracy with Patch Gaussian + Augmentation " by Lopes et al., 2019, for further details. + https://arxiv.org/abs/1906.02611 + + Args: + image: Rank 3 float32 tensor with shape [height, width, channels] and + values in the range [0.0, 255.0]. + min_patch_size: Integer. An inclusive lower bound for the patch size. + max_patch_size: Integer. An exclusive upper bound for the patch size. + min_gaussian_stddev: Float. An inclusive lower bound for the standard + deviation of the gaussian noise. + max_gaussian_stddev: Float. An exclusive upper bound for the standard + deviation of the gaussian noise. + random_coef: Float. Random coefficient that defines the chance of getting + the original image. If random_coef is 0.0, we will always apply + downscaling, and if it is 1.0, we will always get the original image. + seed: (optional) Integer. Random seed. + preprocess_vars_cache: (optional) PreprocessorCache object that records + previously performed augmentations. Updated in-place. If this function is + called multiple times with the same non-null cache, it will perform + deterministically. + + Returns: + Rank 3 float32 tensor with same shape as the input image and with gaussian + noise applied within a random patch. + + Raises: + ValueError: If min_patch_size is < 1. + """ + if min_patch_size < 1: + raise ValueError('Minimum patch size must be >= 1.') + + get_or_create_rand_vars_fn = functools.partial( + _get_or_create_preprocess_rand_vars, + function_id=preprocessor_cache.PreprocessorCache.PATCH_GAUSSIAN, + preprocess_vars_cache=preprocess_vars_cache) + + def _apply_patch_gaussian(image): + """Applies a patch gaussian with random size, location, and stddev.""" + patch_size = get_or_create_rand_vars_fn( + functools.partial( + tf.random_uniform, [], + minval=min_patch_size, + maxval=max_patch_size, + dtype=tf.int32, + seed=seed), + key='patch_size') + gaussian_stddev = get_or_create_rand_vars_fn( + functools.partial( + tf.random_uniform, [], + minval=min_gaussian_stddev, + maxval=max_gaussian_stddev, + dtype=tf.float32, + seed=seed), + key='gaussian_stddev') + + image_shape = tf.shape(image) + y = get_or_create_rand_vars_fn( + functools.partial( + tf.random_uniform, [], + minval=0, + maxval=image_shape[0], + dtype=tf.int32, + seed=seed), + key='y') + x = get_or_create_rand_vars_fn( + functools.partial( + tf.random_uniform, [], + minval=0, + maxval=image_shape[1], + dtype=tf.int32, + seed=seed), + key='x') + gaussian = get_or_create_rand_vars_fn( + functools.partial( + tf.random.normal, + image_shape, + stddev=gaussian_stddev, + dtype=tf.float32, + seed=seed), + key='gaussian') + + scaled_image = image / 255.0 + image_plus_gaussian = tf.clip_by_value(scaled_image + gaussian, 0.0, 1.0) + patch_mask = patch_ops.get_patch_mask(y, x, patch_size, image_shape) + patch_mask = tf.expand_dims(patch_mask, -1) + patch_mask = tf.tile(patch_mask, [1, 1, image_shape[2]]) + patched_image = tf.where(patch_mask, image_plus_gaussian, scaled_image) + return patched_image * 255.0 + + with tf.name_scope('RandomPatchGaussian', values=[image]): + image = tf.cast(image, tf.float32) + patch_gaussian_random = get_or_create_rand_vars_fn( + functools.partial(tf.random_uniform, [], seed=seed)) + do_patch_gaussian = tf.greater_equal(patch_gaussian_random, random_coef) + image = tf.cond(do_patch_gaussian, + lambda: _apply_patch_gaussian(image), + lambda: image) + return image + + +# TODO(barretzoph): Put in AutoAugment Paper link when paper is live. +def autoaugment_image(image, boxes, policy_name='v0'): + """Apply an autoaugment policy to the image and boxes. + + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + boxes: rank 2 float32 tensor containing the bounding boxes with shape + [num_instances, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + policy_name: The name of the AutoAugment policy to use. The available + options are `v0`, `v1`, `v2`, `v3` and `test`. `v0` is the policy used for + all of the results in the paper and was found to achieve the best results + on the COCO dataset. `v1`, `v2` and `v3` are additional good policies + found on the COCO dataset that have slight variation in what operations + were used during the search procedure along with how many operations are + applied in parallel to a single image (2 vs 3). + + + Returns: + image: the augmented image. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. boxes will have been augmented along with image. + """ + return autoaugment_utils.distort_image_with_autoaugment( + image, boxes, policy_name) + + +def image_to_float(image): + """Used in Faster R-CNN. Casts image pixel values to float. + + Args: + image: input image which might be in tf.uint8 or sth else format + + Returns: + image: image in tf.float32 format. + """ + with tf.name_scope('ImageToFloat', values=[image]): + image = tf.cast(image, dtype=tf.float32) + return image + + +def random_resize_method(image, target_size, preprocess_vars_cache=None): + """Uses a random resize method to resize the image to target size. + + Args: + image: a rank 3 tensor. + target_size: a list of [target_height, target_width] + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + resized image. + """ + + resized_image = _apply_with_random_selector( + image, + lambda x, method: tf.image.resize_images(x, target_size, method), + num_cases=4, + preprocess_vars_cache=preprocess_vars_cache, + key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD) + + return resized_image + + +def resize_to_range(image, + masks=None, + min_dimension=None, + max_dimension=None, + method=tf.image.ResizeMethod.BILINEAR, + align_corners=False, + pad_to_max_dimension=False, + per_channel_pad_value=(0, 0, 0)): + """Resizes an image so its dimensions are within the provided value. + + The output size can be described by two cases: + 1. If the image can be rescaled so its minimum dimension is equal to the + provided value without the other dimension exceeding max_dimension, + then do so. + 2. Otherwise, resize so the largest dimension is equal to max_dimension. + + Args: + image: A 3D tensor of shape [height, width, channels] + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. + min_dimension: (optional) (scalar) desired size of the smaller image + dimension. + max_dimension: (optional) (scalar) maximum allowed size + of the larger image dimension. + method: (optional) interpolation method used in resizing. Defaults to + BILINEAR. + align_corners: bool. If true, exactly align all 4 corners of the input + and output. Defaults to False. + pad_to_max_dimension: Whether to resize the image and pad it with zeros + so the resulting image is of the spatial size + [max_dimension, max_dimension]. If masks are included they are padded + similarly. + per_channel_pad_value: A tuple of per-channel scalar value to use for + padding. By default pads zeros. + + Returns: + Note that the position of the resized_image_shape changes based on whether + masks are present. + resized_image: A 3D tensor of shape [new_height, new_width, channels], + where the image has been resized (with bilinear interpolation) so that + min(new_height, new_width) == min_dimension or + max(new_height, new_width) == max_dimension. + resized_masks: If masks is not None, also outputs masks. A 3D tensor of + shape [num_instances, new_height, new_width]. + resized_image_shape: A 1D tensor of shape [3] containing shape of the + resized image. + + Raises: + ValueError: if the image is not a 3D tensor. + """ + if len(image.get_shape()) != 3: + raise ValueError('Image should be 3D tensor') + + def _resize_landscape_image(image): + # resize a landscape image + return tf.image.resize_images( + image, tf.stack([min_dimension, max_dimension]), method=method, + align_corners=align_corners, preserve_aspect_ratio=True) + + def _resize_portrait_image(image): + # resize a portrait image + return tf.image.resize_images( + image, tf.stack([max_dimension, min_dimension]), method=method, + align_corners=align_corners, preserve_aspect_ratio=True) + + with tf.name_scope('ResizeToRange', values=[image, min_dimension]): + if image.get_shape().is_fully_defined(): + if image.get_shape()[0] < image.get_shape()[1]: + new_image = _resize_landscape_image(image) + else: + new_image = _resize_portrait_image(image) + new_size = tf.constant(new_image.get_shape().as_list()) + else: + new_image = tf.cond( + tf.less(tf.shape(image)[0], tf.shape(image)[1]), + lambda: _resize_landscape_image(image), + lambda: _resize_portrait_image(image)) + new_size = tf.shape(new_image) + + if pad_to_max_dimension: + channels = tf.unstack(new_image, axis=2) + if len(channels) != len(per_channel_pad_value): + raise ValueError('Number of channels must be equal to the length of ' + 'per-channel pad value.') + new_image = tf.stack( + [ + tf.pad( + channels[i], [[0, max_dimension - new_size[0]], + [0, max_dimension - new_size[1]]], + constant_values=per_channel_pad_value[i]) + for i in range(len(channels)) + ], + axis=2) + new_image.set_shape([max_dimension, max_dimension, 3]) + + result = [new_image] + if masks is not None: + new_masks = tf.expand_dims(masks, 3) + new_masks = tf.image.resize_images( + new_masks, + new_size[:-1], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, + align_corners=align_corners) + if pad_to_max_dimension: + new_masks = tf.image.pad_to_bounding_box( + new_masks, 0, 0, max_dimension, max_dimension) + new_masks = tf.squeeze(new_masks, 3) + result.append(new_masks) + + result.append(new_size) + return result + + +def _get_image_info(image): + """Returns the height, width and number of channels in the image.""" + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + num_channels = tf.shape(image)[2] + return (image_height, image_width, num_channels) + + +# TODO(alirezafathi): Make sure the static shapes are preserved. +def resize_to_min_dimension(image, masks=None, min_dimension=600, + method=tf.image.ResizeMethod.BILINEAR): + """Resizes image and masks given the min size maintaining the aspect ratio. + + If one of the image dimensions is smaller than min_dimension, it will scale + the image such that its smallest dimension is equal to min_dimension. + Otherwise, will keep the image size as is. + + Args: + image: a tensor of size [height, width, channels]. + masks: (optional) a tensors of size [num_instances, height, width]. + min_dimension: minimum image dimension. + method: (optional) interpolation method used in resizing. Defaults to + BILINEAR. + + Returns: + An array containing resized_image, resized_masks, and resized_image_shape. + Note that the position of the resized_image_shape changes based on whether + masks are present. + resized_image: A tensor of size [new_height, new_width, channels]. + resized_masks: If masks is not None, also outputs masks. A 3D tensor of + shape [num_instances, new_height, new_width] + resized_image_shape: A 1D tensor of shape [3] containing the shape of the + resized image. + + Raises: + ValueError: if the image is not a 3D tensor. + """ + if len(image.get_shape()) != 3: + raise ValueError('Image should be 3D tensor') + + with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]): + (image_height, image_width, num_channels) = _get_image_info(image) + min_image_dimension = tf.minimum(image_height, image_width) + min_target_dimension = tf.maximum(min_image_dimension, min_dimension) + target_ratio = tf.cast(min_target_dimension, dtype=tf.float32) / tf.cast( + min_image_dimension, dtype=tf.float32) + target_height = tf.cast( + tf.cast(image_height, dtype=tf.float32) * target_ratio, dtype=tf.int32) + target_width = tf.cast( + tf.cast(image_width, dtype=tf.float32) * target_ratio, dtype=tf.int32) + image = tf.image.resize_images( + tf.expand_dims(image, axis=0), size=[target_height, target_width], + method=method, + align_corners=True) + result = [tf.squeeze(image, axis=0)] + + if masks is not None: + masks = tf.image.resize_nearest_neighbor( + tf.expand_dims(masks, axis=3), + size=[target_height, target_width], + align_corners=True) + result.append(tf.squeeze(masks, axis=3)) + + result.append(tf.stack([target_height, target_width, num_channels])) + return result + + +def resize_to_max_dimension(image, masks=None, max_dimension=600, + method=tf.image.ResizeMethod.BILINEAR): + """Resizes image and masks given the max size maintaining the aspect ratio. + + If one of the image dimensions is greater than max_dimension, it will scale + the image such that its largest dimension is equal to max_dimension. + Otherwise, will keep the image size as is. + + Args: + image: a tensor of size [height, width, channels]. + masks: (optional) a tensors of size [num_instances, height, width]. + max_dimension: maximum image dimension. + method: (optional) interpolation method used in resizing. Defaults to + BILINEAR. + + Returns: + An array containing resized_image, resized_masks, and resized_image_shape. + Note that the position of the resized_image_shape changes based on whether + masks are present. + resized_image: A tensor of size [new_height, new_width, channels]. + resized_masks: If masks is not None, also outputs masks. A 3D tensor of + shape [num_instances, new_height, new_width] + resized_image_shape: A 1D tensor of shape [3] containing the shape of the + resized image. + + Raises: + ValueError: if the image is not a 3D tensor. + """ + if len(image.get_shape()) != 3: + raise ValueError('Image should be 3D tensor') + + with tf.name_scope('ResizeGivenMaxDimension', values=[image, max_dimension]): + (image_height, image_width, num_channels) = _get_image_info(image) + max_image_dimension = tf.maximum(image_height, image_width) + max_target_dimension = tf.minimum(max_image_dimension, max_dimension) + target_ratio = tf.cast(max_target_dimension, dtype=tf.float32) / tf.cast( + max_image_dimension, dtype=tf.float32) + target_height = tf.cast( + tf.cast(image_height, dtype=tf.float32) * target_ratio, dtype=tf.int32) + target_width = tf.cast( + tf.cast(image_width, dtype=tf.float32) * target_ratio, dtype=tf.int32) + image = tf.image.resize_images( + tf.expand_dims(image, axis=0), size=[target_height, target_width], + method=method, + align_corners=True) + result = [tf.squeeze(image, axis=0)] + + if masks is not None: + masks = tf.image.resize_nearest_neighbor( + tf.expand_dims(masks, axis=3), + size=[target_height, target_width], + align_corners=True) + result.append(tf.squeeze(masks, axis=3)) + + result.append(tf.stack([target_height, target_width, num_channels])) + return result + + +def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): + """Scales boxes from normalized to pixel coordinates. + + Args: + image: A 3D float32 tensor of shape [height, width, channels]. + boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding + boxes in normalized coordinates. Each row is of the form + [ymin, xmin, ymax, xmax]. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized + coordinates. + + Returns: + image: unchanged input image. + scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the + bounding boxes in pixel coordinates. + scaled_keypoints: a 3D float32 tensor with shape + [num_instances, num_keypoints, 2] containing the keypoints in pixel + coordinates. + """ + boxlist = box_list.BoxList(boxes) + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get() + result = [image, scaled_boxes] + if keypoints is not None: + scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width) + result.append(scaled_keypoints) + return tuple(result) + + +# TODO(alirezafathi): Investigate if instead the function should return None if +# masks is None. +# pylint: disable=g-doc-return-or-yield +def resize_image(image, + masks=None, + new_height=600, + new_width=1024, + method=tf.image.ResizeMethod.BILINEAR, + align_corners=False): + """Resizes images to the given height and width. + + Args: + image: A 3D tensor of shape [height, width, channels] + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. + new_height: (optional) (scalar) desired height of the image. + new_width: (optional) (scalar) desired width of the image. + method: (optional) interpolation method used in resizing. Defaults to + BILINEAR. + align_corners: bool. If true, exactly align all 4 corners of the input + and output. Defaults to False. + + Returns: + Note that the position of the resized_image_shape changes based on whether + masks are present. + resized_image: A tensor of size [new_height, new_width, channels]. + resized_masks: If masks is not None, also outputs masks. A 3D tensor of + shape [num_instances, new_height, new_width] + resized_image_shape: A 1D tensor of shape [3] containing the shape of the + resized image. + """ + with tf.name_scope( + 'ResizeImage', + values=[image, new_height, new_width, method, align_corners]): + new_image = tf.image.resize_images( + image, tf.stack([new_height, new_width]), + method=method, + align_corners=align_corners) + image_shape = shape_utils.combined_static_and_dynamic_shape(image) + result = [new_image] + if masks is not None: + num_instances = tf.shape(masks)[0] + new_size = tf.stack([new_height, new_width]) + def resize_masks_branch(): + new_masks = tf.expand_dims(masks, 3) + new_masks = tf.image.resize_nearest_neighbor( + new_masks, new_size, align_corners=align_corners) + new_masks = tf.squeeze(new_masks, axis=3) + return new_masks + + def reshape_masks_branch(): + # The shape function will be computed for both branches of the + # condition, regardless of which branch is actually taken. Make sure + # that we don't trigger an assertion in the shape function when trying + # to reshape a non empty tensor into an empty one. + new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]]) + return new_masks + + masks = tf.cond(num_instances > 0, resize_masks_branch, + reshape_masks_branch) + result.append(masks) + + result.append(tf.stack([new_height, new_width, image_shape[2]])) + return result + + +def subtract_channel_mean(image, means=None): + """Normalizes an image by subtracting a mean from each channel. + + Args: + image: A 3D tensor of shape [height, width, channels] + means: float list containing a mean for each channel + Returns: + normalized_images: a tensor of shape [height, width, channels] + Raises: + ValueError: if images is not a 4D tensor or if the number of means is not + equal to the number of channels. + """ + with tf.name_scope('SubtractChannelMean', values=[image, means]): + if len(image.get_shape()) != 3: + raise ValueError('Input must be of size [height, width, channels]') + if len(means) != image.get_shape()[-1]: + raise ValueError('len(means) must match the number of channels') + return image - [[means]] + + +def one_hot_encoding(labels, num_classes=None): + """One-hot encodes the multiclass labels. + + Example usage: + labels = tf.constant([1, 4], dtype=tf.int32) + one_hot = OneHotEncoding(labels, num_classes=5) + one_hot.eval() # evaluates to [0, 1, 0, 0, 1] + + Args: + labels: A tensor of shape [None] corresponding to the labels. + num_classes: Number of classes in the dataset. + Returns: + onehot_labels: a tensor of shape [num_classes] corresponding to the one hot + encoding of the labels. + Raises: + ValueError: if num_classes is not specified. + """ + with tf.name_scope('OneHotEncoding', values=[labels]): + if num_classes is None: + raise ValueError('num_classes must be specified') + + labels = tf.one_hot(labels, num_classes, 1, 0) + return tf.reduce_max(labels, 0) + + +def rgb_to_gray(image): + """Converts a 3 channel RGB image to a 1 channel grayscale image. + + Args: + image: Rank 3 float32 tensor containing 1 image -> [height, width, 3] + with pixel values varying between [0, 1]. + + Returns: + image: A single channel grayscale image -> [image, height, 1]. + """ + return _rgb_to_grayscale(image) + + +def random_self_concat_image( + image, boxes, labels, label_weights, label_confidences=None, + multiclass_scores=None, concat_vertical_probability=0.1, + concat_horizontal_probability=0.1, seed=None, + preprocess_vars_cache=None): + """Randomly concatenates the image with itself. + + This function randomly concatenates the image with itself; the random + variables for vertical and horizontal concatenation are independent. + Afterwards, we adjust the old bounding boxes, and add new bounding boxes + for the new objects. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: rank 1 float32 containing the label weights. + label_confidences: (optional) rank 1 float32 containing the label + confidences. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for + each box for each class. + concat_vertical_probability: (optional) a tf.float32 scalar denoting the + probability of a vertical concatenation. + concat_horizontal_probability: (optional) a tf.float32 scalar denoting the + probability of a horizontal concatenation. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: Image shape will be [new_height, new_width, channels]. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. + if label_confidences is not None also returns: + maybe_concat_label_confidences: cropped label weights. + if multiclass_scores is not None also returns: + maybe_concat_multiclass_scores: cropped_multiclass_scores. + """ + + concat_vertical = (tf.random_uniform([], seed=seed) < + concat_vertical_probability) + # Note the seed + 1 so we get some semblance of independence even with + # fixed seeds. + concat_horizontal = (tf.random_uniform([], seed=seed + 1 if seed else None) + < concat_horizontal_probability) + + gen_func = lambda: (concat_vertical, concat_horizontal) + params = _get_or_create_preprocess_rand_vars( + gen_func, preprocessor_cache.PreprocessorCache.SELF_CONCAT_IMAGE, + preprocess_vars_cache) + concat_vertical, concat_horizontal = params + + def _concat_image(image, boxes, labels, label_weights, axis): + """Concats the image to itself on `axis`.""" + output_images = tf.concat([image, image], axis=axis) + + if axis == 0: + # Concat vertically, so need to reduce the y coordinates. + old_scaling = tf.constant([0.5, 1.0, 0.5, 1.0]) + new_translation = tf.constant([0.5, 0.0, 0.5, 0.0]) + elif axis == 1: + old_scaling = tf.constant([1.0, 0.5, 1.0, 0.5]) + new_translation = tf.constant([0.0, 0.5, 0.0, 0.5]) + + old_boxes = old_scaling * boxes + new_boxes = old_boxes + new_translation + all_boxes = tf.concat([old_boxes, new_boxes], axis=0) + + return [output_images, all_boxes, tf.tile(labels, [2]), tf.tile( + label_weights, [2])] + + image, boxes, labels, label_weights = tf.cond( + concat_vertical, + lambda: _concat_image(image, boxes, labels, label_weights, axis=0), + lambda: [image, boxes, labels, label_weights], + strict=True) + + outputs = tf.cond( + concat_horizontal, + lambda: _concat_image(image, boxes, labels, label_weights, axis=1), + lambda: [image, boxes, labels, label_weights], + strict=True) + + if label_confidences is not None: + label_confidences = tf.cond(concat_vertical, + lambda: tf.tile(label_confidences, [2]), + lambda: label_confidences) + outputs.append(tf.cond(concat_horizontal, + lambda: tf.tile(label_confidences, [2]), + lambda: label_confidences)) + + if multiclass_scores is not None: + multiclass_scores = tf.cond(concat_vertical, + lambda: tf.tile(multiclass_scores, [2, 1]), + lambda: multiclass_scores) + outputs.append(tf.cond(concat_horizontal, + lambda: tf.tile(multiclass_scores, [2, 1]), + lambda: multiclass_scores)) + + return outputs + + +def ssd_random_crop(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + aspect_ratio_range=((0.5, 2.0),) * 7, + area_range=((0.1, 1.0),) * 7, + overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + clip_boxes=(True,) * 7, + random_coef=(0.15,) * 7, + seed=None, + preprocess_vars_cache=None): + """Random crop preprocessing with default parameters as in SSD paper. + + Liu et al., SSD: Single shot multibox detector. + For further information on random crop preprocessing refer to RandomCrop + function above. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: rank 1 float32 tensor containing the weights. + label_confidences: rank 1 float32 tensor containing the confidences. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If label_weights, multiclass_scores, masks, or keypoints is not None, the + function also returns: + label_weights: rank 1 float32 tensor with shape [num_instances]. + multiclass_scores: rank 2 float32 tensor with shape + [num_instances, num_classes] + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + + def random_crop_selector(selected_result, index): + """Applies random_crop_image to selected result. + + Args: + selected_result: A tuple containing image, boxes, labels, keypoints (if + not None), and masks (if not None). + index: The index that was randomly selected. + + Returns: A tuple containing image, boxes, labels, keypoints (if not None), + and masks (if not None). + """ + + i = 3 + image, boxes, labels = selected_result[:i] + selected_label_weights = None + selected_label_confidences = None + selected_multiclass_scores = None + selected_masks = None + selected_keypoints = None + if label_weights is not None: + selected_label_weights = selected_result[i] + i += 1 + if label_confidences is not None: + selected_label_confidences = selected_result[i] + i += 1 + if multiclass_scores is not None: + selected_multiclass_scores = selected_result[i] + i += 1 + if masks is not None: + selected_masks = selected_result[i] + i += 1 + if keypoints is not None: + selected_keypoints = selected_result[i] + + return random_crop_image( + image=image, + boxes=boxes, + labels=labels, + label_weights=selected_label_weights, + label_confidences=selected_label_confidences, + multiclass_scores=selected_multiclass_scores, + masks=selected_masks, + keypoints=selected_keypoints, + min_object_covered=min_object_covered[index], + aspect_ratio_range=aspect_ratio_range[index], + area_range=area_range[index], + overlap_thresh=overlap_thresh[index], + clip_boxes=clip_boxes[index], + random_coef=random_coef[index], + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + result = _apply_with_random_selector_tuples( + tuple( + t for t in (image, boxes, labels, label_weights, label_confidences, + multiclass_scores, masks, keypoints) if t is not None), + random_crop_selector, + num_cases=len(min_object_covered), + preprocess_vars_cache=preprocess_vars_cache, + key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID) + return result + + +def ssd_random_crop_pad(image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + aspect_ratio_range=((0.5, 2.0),) * 6, + area_range=((0.1, 1.0),) * 6, + overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + clip_boxes=(True,) * 6, + random_coef=(0.15,) * 6, + min_padded_size_ratio=((1.0, 1.0),) * 6, + max_padded_size_ratio=((2.0, 2.0),) * 6, + pad_color=(None,) * 6, + seed=None, + preprocess_vars_cache=None): + """Random crop preprocessing with default parameters as in SSD paper. + + Liu et al., SSD: Single shot multibox detector. + For further information on random crop preprocessing refer to RandomCrop + function above. + + Args: + image: rank 3 float32 tensor containing 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: float32 tensor of shape [num_instances] representing the + confidences for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + min_padded_size_ratio: min ratio of padded image height and width to the + input image's height and width. + max_padded_size_ratio: max ratio of padded image height and width to the + input image's height and width. + pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. + if set as None, it will be set to average color of the randomly + cropped image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: Image shape will be [new_height, new_width, channels]. + boxes: boxes which is the same rank as input boxes. Boxes are in normalized + form. + new_labels: new labels. + new_label_weights: new label weights. + """ + + def random_crop_pad_selector(image_boxes_labels, index): + """Random crop preprocessing helper.""" + i = 3 + image, boxes, labels = image_boxes_labels[:i] + selected_label_weights = None + selected_label_confidences = None + selected_multiclass_scores = None + if label_weights is not None: + selected_label_weights = image_boxes_labels[i] + i += 1 + if label_confidences is not None: + selected_label_confidences = image_boxes_labels[i] + i += 1 + if multiclass_scores is not None: + selected_multiclass_scores = image_boxes_labels[i] + + return random_crop_pad_image( + image, + boxes, + labels, + label_weights=selected_label_weights, + label_confidences=selected_label_confidences, + multiclass_scores=selected_multiclass_scores, + min_object_covered=min_object_covered[index], + aspect_ratio_range=aspect_ratio_range[index], + area_range=area_range[index], + overlap_thresh=overlap_thresh[index], + clip_boxes=clip_boxes[index], + random_coef=random_coef[index], + min_padded_size_ratio=min_padded_size_ratio[index], + max_padded_size_ratio=max_padded_size_ratio[index], + pad_color=pad_color[index], + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + return _apply_with_random_selector_tuples( + tuple(t for t in (image, boxes, labels, label_weights, label_confidences, + multiclass_scores) if t is not None), + random_crop_pad_selector, + num_cases=len(min_object_covered), + preprocess_vars_cache=preprocess_vars_cache, + key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID) + + +def ssd_random_crop_fixed_aspect_ratio( + image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + aspect_ratio=1.0, + area_range=((0.1, 1.0),) * 7, + overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + clip_boxes=(True,) * 7, + random_coef=(0.15,) * 7, + seed=None, + preprocess_vars_cache=None): + """Random crop preprocessing with default parameters as in SSD paper. + + Liu et al., SSD: Single shot multibox detector. + For further information on random crop preprocessing refer to RandomCrop + function above. + + The only difference is that the aspect ratio of the crops are fixed. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidences for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio: aspect ratio of the cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If multiclass_scores, masks, or keypoints is not None, the function also + returns: + + multiclass_scores: rank 2 float32 tensor with shape + [num_instances, num_classes] + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range) + + crop_result = ssd_random_crop( + image, + boxes, + labels, + label_weights=label_weights, + label_confidences=label_confidences, + multiclass_scores=multiclass_scores, + masks=masks, + keypoints=keypoints, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + overlap_thresh=overlap_thresh, + clip_boxes=clip_boxes, + random_coef=random_coef, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + i = 3 + new_image, new_boxes, new_labels = crop_result[:i] + new_label_weights = None + new_label_confidences = None + new_multiclass_scores = None + new_masks = None + new_keypoints = None + if label_weights is not None: + new_label_weights = crop_result[i] + i += 1 + if label_confidences is not None: + new_label_confidences = crop_result[i] + i += 1 + if multiclass_scores is not None: + new_multiclass_scores = crop_result[i] + i += 1 + if masks is not None: + new_masks = crop_result[i] + i += 1 + if keypoints is not None: + new_keypoints = crop_result[i] + + result = random_crop_to_aspect_ratio( + new_image, + new_boxes, + new_labels, + label_weights=new_label_weights, + label_confidences=new_label_confidences, + multiclass_scores=new_multiclass_scores, + masks=new_masks, + keypoints=new_keypoints, + aspect_ratio=aspect_ratio, + clip_boxes=clip_boxes, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + return result + + +def ssd_random_crop_pad_fixed_aspect_ratio( + image, + boxes, + labels, + label_weights, + label_confidences=None, + multiclass_scores=None, + masks=None, + keypoints=None, + min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + aspect_ratio=1.0, + aspect_ratio_range=((0.5, 2.0),) * 7, + area_range=((0.1, 1.0),) * 7, + overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), + clip_boxes=(True,) * 7, + random_coef=(0.15,) * 7, + min_padded_size_ratio=(1.0, 1.0), + max_padded_size_ratio=(2.0, 2.0), + seed=None, + preprocess_vars_cache=None): + """Random crop and pad preprocessing with default parameters as in SSD paper. + + Liu et al., SSD: Single shot multibox detector. + For further information on random crop preprocessing refer to RandomCrop + function above. + + The only difference is that after the initial crop, images are zero-padded + to a fixed aspect ratio instead of being resized to that aspect ratio. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 1]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning their coordinates vary + between [0, 1]. + Each row is in the form of [ymin, xmin, ymax, xmax]. + labels: rank 1 int32 tensor containing the object classes. + label_weights: float32 tensor of shape [num_instances] representing the + weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidence for each box. + multiclass_scores: (optional) float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + masks: (optional) rank 3 float32 tensor with shape + [num_instances, height, width] containing instance masks. The masks + are of the same height, width as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2]. The keypoints are in y-x + normalized coordinates. + min_object_covered: the cropped image must cover at least this fraction of + at least one of the input bounding boxes. + aspect_ratio: the final aspect ratio to pad to. + aspect_ratio_range: allowed range for aspect ratio of cropped image. + area_range: allowed range for area ratio between cropped image and the + original image. + overlap_thresh: minimum overlap thresh with new cropped + image to keep the box. + clip_boxes: whether to clip the boxes to the cropped image. + random_coef: a random coefficient that defines the chance of getting the + original image. If random_coef is 0, we will always get the + cropped image, and if it is 1.0, we will always get the + original image. + min_padded_size_ratio: min ratio of padded image height and width to the + input image's height and width. + max_padded_size_ratio: max ratio of padded image height and width to the + input image's height and width. + seed: random seed. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + image: image which is the same rank as input image. + boxes: boxes which is the same rank as input boxes. + Boxes are in normalized form. + labels: new labels. + + If multiclass_scores, masks, or keypoints is not None, the function also + returns: + + multiclass_scores: rank 2 with shape [num_instances, num_classes] + masks: rank 3 float32 tensor with shape [num_instances, height, width] + containing instance masks. + keypoints: rank 3 float32 tensor with shape + [num_instances, num_keypoints, 2] + """ + crop_result = ssd_random_crop( + image, + boxes, + labels, + label_weights=label_weights, + label_confidences=label_confidences, + multiclass_scores=multiclass_scores, + masks=masks, + keypoints=keypoints, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + overlap_thresh=overlap_thresh, + clip_boxes=clip_boxes, + random_coef=random_coef, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + i = 3 + new_image, new_boxes, new_labels = crop_result[:i] + new_label_weights = None + new_label_confidences = None + new_multiclass_scores = None + new_masks = None + new_keypoints = None + if label_weights is not None: + new_label_weights = crop_result[i] + i += 1 + if label_confidences is not None: + new_label_confidences = crop_result[i] + i += 1 + if multiclass_scores is not None: + new_multiclass_scores = crop_result[i] + i += 1 + if masks is not None: + new_masks = crop_result[i] + i += 1 + if keypoints is not None: + new_keypoints = crop_result[i] + + result = random_pad_to_aspect_ratio( + new_image, + new_boxes, + masks=new_masks, + keypoints=new_keypoints, + aspect_ratio=aspect_ratio, + min_padded_size_ratio=min_padded_size_ratio, + max_padded_size_ratio=max_padded_size_ratio, + seed=seed, + preprocess_vars_cache=preprocess_vars_cache) + + result = list(result) + i = 3 + result.insert(2, new_labels) + if new_label_weights is not None: + result.insert(i, new_label_weights) + i += 1 + if new_label_confidences is not None: + result.insert(i, new_label_confidences) + i += 1 + if multiclass_scores is not None: + result.insert(i, new_multiclass_scores) + result = tuple(result) + + return result + + +def convert_class_logits_to_softmax(multiclass_scores, temperature=1.0): + """Converts multiclass logits to softmax scores after applying temperature. + + Args: + multiclass_scores: float32 tensor of shape + [num_instances, num_classes] representing the score for each box for each + class. + temperature: Scale factor to use prior to applying softmax. Larger + temperatures give more uniform distruibutions after softmax. + + Returns: + multiclass_scores: float32 tensor of shape + [num_instances, num_classes] with scaling and softmax applied. + """ + + # Multiclass scores must be stored as logits. Apply temp and softmax. + multiclass_scores_scaled = tf.divide( + multiclass_scores, temperature, name='scale_logits') + multiclass_scores = tf.nn.softmax(multiclass_scores_scaled, name='softmax') + + return multiclass_scores + + +def get_default_func_arg_map(include_label_weights=True, + include_label_confidences=False, + include_multiclass_scores=False, + include_instance_masks=False, + include_keypoints=False): + """Returns the default mapping from a preprocessor function to its args. + + Args: + include_label_weights: If True, preprocessing functions will modify the + label weights, too. + include_label_confidences: If True, preprocessing functions will modify the + label confidences, too. + include_multiclass_scores: If True, preprocessing functions will modify the + multiclass scores, too. + include_instance_masks: If True, preprocessing functions will modify the + instance masks, too. + include_keypoints: If True, preprocessing functions will modify the + keypoints, too. + + Returns: + A map from preprocessing functions to the arguments they receive. + """ + groundtruth_label_weights = None + if include_label_weights: + groundtruth_label_weights = ( + fields.InputDataFields.groundtruth_weights) + + groundtruth_label_confidences = None + if include_label_confidences: + groundtruth_label_confidences = ( + fields.InputDataFields.groundtruth_confidences) + + multiclass_scores = None + if include_multiclass_scores: + multiclass_scores = (fields.InputDataFields.multiclass_scores) + + groundtruth_instance_masks = None + if include_instance_masks: + groundtruth_instance_masks = ( + fields.InputDataFields.groundtruth_instance_masks) + + groundtruth_keypoints = None + if include_keypoints: + groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints + + prep_func_arg_map = { + normalize_image: (fields.InputDataFields.image,), + random_horizontal_flip: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + random_vertical_flip: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + random_rotation90: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + random_pixel_value_scale: (fields.InputDataFields.image,), + random_image_scale: ( + fields.InputDataFields.image, + groundtruth_instance_masks, + ), + random_rgb_to_gray: (fields.InputDataFields.image,), + random_adjust_brightness: (fields.InputDataFields.image,), + random_adjust_contrast: (fields.InputDataFields.image,), + random_adjust_hue: (fields.InputDataFields.image,), + random_adjust_saturation: (fields.InputDataFields.image,), + random_distort_color: (fields.InputDataFields.image,), + random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,), + random_crop_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints), + random_pad_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_keypoints), + random_absolute_pad_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes), + random_crop_pad_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores), + random_crop_to_aspect_ratio: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + random_pad_to_aspect_ratio: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + random_black_patches: (fields.InputDataFields.image,), + random_jpeg_quality: (fields.InputDataFields.image,), + random_downscale_to_target_pixels: ( + fields.InputDataFields.image, + groundtruth_instance_masks, + ), + random_patch_gaussian: (fields.InputDataFields.image,), + autoaugment_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes,), + retain_boxes_above_threshold: ( + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + drop_label_probabilistically: ( + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + remap_labels: (fields.InputDataFields.groundtruth_classes,), + image_to_float: (fields.InputDataFields.image,), + random_resize_method: (fields.InputDataFields.image,), + resize_to_range: ( + fields.InputDataFields.image, + groundtruth_instance_masks, + ), + resize_to_min_dimension: ( + fields.InputDataFields.image, + groundtruth_instance_masks, + ), + scale_boxes_to_pixel_coordinates: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + groundtruth_keypoints, + ), + resize_image: ( + fields.InputDataFields.image, + groundtruth_instance_masks, + ), + subtract_channel_mean: (fields.InputDataFields.image,), + one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,), + rgb_to_gray: (fields.InputDataFields.image,), + random_self_concat_image: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores), + ssd_random_crop: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints), + ssd_random_crop_pad: (fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores), + ssd_random_crop_fixed_aspect_ratio: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints), + ssd_random_crop_pad_fixed_aspect_ratio: ( + fields.InputDataFields.image, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + groundtruth_label_weights, + groundtruth_label_confidences, + multiclass_scores, + groundtruth_instance_masks, + groundtruth_keypoints, + ), + convert_class_logits_to_softmax: (multiclass_scores,), + } + + return prep_func_arg_map + + +def preprocess(tensor_dict, + preprocess_options, + func_arg_map=None, + preprocess_vars_cache=None): + """Preprocess images and bounding boxes. + + Various types of preprocessing (to be implemented) based on the + preprocess_options dictionary e.g. "crop image" (affects image and possibly + boxes), "white balance image" (affects only image), etc. If self._options + is None, no preprocessing is done. + + Args: + tensor_dict: dictionary that contains images, boxes, and can contain other + things as well. + images-> rank 4 float32 tensor contains + 1 image -> [1, height, width, 3]. + with pixel values varying between [0, 1] + boxes-> rank 2 float32 tensor containing + the bounding boxes -> [N, 4]. + Boxes are in normalized form meaning + their coordinates vary between [0, 1]. + Each row is in the form + of [ymin, xmin, ymax, xmax]. + preprocess_options: It is a list of tuples, where each tuple contains a + function and a dictionary that contains arguments and + their values. + func_arg_map: mapping from preprocessing functions to arguments that they + expect to receive and return. + preprocess_vars_cache: PreprocessorCache object that records previously + performed augmentations. Updated in-place. If this + function is called multiple times with the same + non-null cache, it will perform deterministically. + + Returns: + tensor_dict: which contains the preprocessed images, bounding boxes, etc. + + Raises: + ValueError: (a) If the functions passed to Preprocess + are not in func_arg_map. + (b) If the arguments that a function needs + do not exist in tensor_dict. + (c) If image in tensor_dict is not rank 4 + """ + if func_arg_map is None: + func_arg_map = get_default_func_arg_map() + + # changes the images to image (rank 4 to rank 3) since the functions + # receive rank 3 tensor for image + if fields.InputDataFields.image in tensor_dict: + images = tensor_dict[fields.InputDataFields.image] + if len(images.get_shape()) != 4: + raise ValueError('images in tensor_dict should be rank 4') + image = tf.squeeze(images, axis=0) + tensor_dict[fields.InputDataFields.image] = image + + # Preprocess inputs based on preprocess_options + for option in preprocess_options: + func, params = option + if func not in func_arg_map: + raise ValueError('The function %s does not exist in func_arg_map' % + (func.__name__)) + arg_names = func_arg_map[func] + for a in arg_names: + if a is not None and a not in tensor_dict: + raise ValueError('The function %s requires argument %s' % + (func.__name__, a)) + + def get_arg(key): + return tensor_dict[key] if key is not None else None + + args = [get_arg(a) for a in arg_names] + if preprocess_vars_cache is not None: + if six.PY2: + # pylint: disable=deprecated-method + arg_spec = inspect.getargspec(func) + # pylint: enable=deprecated-method + else: + arg_spec = inspect.getfullargspec(func) + if 'preprocess_vars_cache' in arg_spec.args: + params['preprocess_vars_cache'] = preprocess_vars_cache + + results = func(*args, **params) + if not isinstance(results, (list, tuple)): + results = (results,) + # Removes None args since the return values will not contain those. + arg_names = [arg_name for arg_name in arg_names if arg_name is not None] + for res, arg_name in zip(results, arg_names): + tensor_dict[arg_name] = res + + # changes the image to images (rank 3 to rank 4) to be compatible to what + # we received in the first place + if fields.InputDataFields.image in tensor_dict: + image = tensor_dict[fields.InputDataFields.image] + images = tf.expand_dims(image, 0) + tensor_dict[fields.InputDataFields.image] = images + + return tensor_dict diff --git a/core/preprocessor_cache.py b/core/preprocessor_cache.py new file mode 100644 index 0000000..706d44c --- /dev/null +++ b/core/preprocessor_cache.py @@ -0,0 +1,107 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Records previous preprocessing operations and allows them to be repeated. + +Used with object_detection.core.preprocessor. Passing a PreprocessorCache +into individual data augmentation functions or the general preprocess() function +will store all randomly generated variables in the PreprocessorCache. When +a preprocessor function is called multiple times with the same +PreprocessorCache object, that function will perform the same augmentation +on all calls. +""" + +from collections import defaultdict + + +class PreprocessorCache(object): + """Dictionary wrapper storing random variables generated during preprocessing. + """ + + # Constant keys representing different preprocessing functions + ROTATION90 = 'rotation90' + HORIZONTAL_FLIP = 'horizontal_flip' + VERTICAL_FLIP = 'vertical_flip' + PIXEL_VALUE_SCALE = 'pixel_value_scale' + IMAGE_SCALE = 'image_scale' + RGB_TO_GRAY = 'rgb_to_gray' + ADJUST_BRIGHTNESS = 'adjust_brightness' + ADJUST_CONTRAST = 'adjust_contrast' + ADJUST_HUE = 'adjust_hue' + ADJUST_SATURATION = 'adjust_saturation' + DISTORT_COLOR = 'distort_color' + STRICT_CROP_IMAGE = 'strict_crop_image' + CROP_IMAGE = 'crop_image' + PAD_IMAGE = 'pad_image' + CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio' + RESIZE_METHOD = 'resize_method' + PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio' + BLACK_PATCHES = 'black_patches' + ADD_BLACK_PATCH = 'add_black_patch' + SELECTOR = 'selector' + SELECTOR_TUPLES = 'selector_tuples' + SELF_CONCAT_IMAGE = 'self_concat_image' + SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id' + SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id' + JPEG_QUALITY = 'jpeg_quality' + DOWNSCALE_TO_TARGET_PIXELS = 'downscale_to_target_pixels' + PATCH_GAUSSIAN = 'patch_gaussian' + + # 27 permitted function ids + _VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE, + IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST, + ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE, + CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD, + PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR, + SELECTOR_TUPLES, SELF_CONCAT_IMAGE, SSD_CROP_SELECTOR_ID, + SSD_CROP_PAD_SELECTOR_ID, JPEG_QUALITY, + DOWNSCALE_TO_TARGET_PIXELS, PATCH_GAUSSIAN] + + def __init__(self): + self._history = defaultdict(dict) + + def clear(self): + """Resets cache.""" + self._history = defaultdict(dict) + + def get(self, function_id, key): + """Gets stored value given a function id and key. + + Args: + function_id: identifier for the preprocessing function used. + key: identifier for the variable stored. + Returns: + value: the corresponding value, expected to be a tensor or + nested structure of tensors. + Raises: + ValueError: if function_id is not one of the 23 valid function ids. + """ + if function_id not in self._VALID_FNS: + raise ValueError('Function id not recognized: %s.' % str(function_id)) + return self._history[function_id].get(key) + + def update(self, function_id, key, value): + """Adds a value to the dictionary. + + Args: + function_id: identifier for the preprocessing function used. + key: identifier for the variable stored. + value: the value to store, expected to be a tensor or nested structure + of tensors. + Raises: + ValueError: if function_id is not one of the 23 valid function ids. + """ + if function_id not in self._VALID_FNS: + raise ValueError('Function id not recognized: %s.' % str(function_id)) + self._history[function_id][key] = value diff --git a/core/preprocessor_test.py b/core/preprocessor_test.py new file mode 100644 index 0000000..3916774 --- /dev/null +++ b/core/preprocessor_test.py @@ -0,0 +1,3585 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.preprocessor.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np +import six +from six.moves import range +from six.moves import zip +import tensorflow as tf + +from object_detection.core import preprocessor +from object_detection.core import preprocessor_cache +from object_detection.core import standard_fields as fields + +if six.PY2: + import mock # pylint: disable=g-import-not-at-top +else: + from unittest import mock # pylint: disable=g-import-not-at-top + + +class PreprocessorTest(tf.test.TestCase, parameterized.TestCase): + + def createColorfulTestImage(self): + ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8)) + ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8)) + ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8)) + imr = tf.concat([ch255, ch0, ch0], 3) + img = tf.concat([ch255, ch255, ch0], 3) + imb = tf.concat([ch255, ch0, ch255], 3) + imw = tf.concat([ch128, ch128, ch128], 3) + imu = tf.concat([imr, img], 2) + imd = tf.concat([imb, imw], 2) + im = tf.concat([imu, imd], 1) + return im + + def createTestImages(self): + images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128], + [0, 128, 128, 128], [192, 192, 128, 128]]], + dtype=tf.uint8) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128], + [0, 128, 192, 192], [192, 192, 128, 192]]], + dtype=tf.uint8) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192], + [0, 128, 128, 0], [192, 192, 192, 128]]], + dtype=tf.uint8) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def createEmptyTestBoxes(self): + boxes = tf.constant([[]], dtype=tf.float32) + return boxes + + def createTestBoxes(self): + boxes = tf.constant( + [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) + return boxes + + def createTestGroundtruthWeights(self): + return tf.constant([1.0, 0.5], dtype=tf.float32) + + def createTestMasks(self): + mask = np.array([ + [[255.0, 0.0, 0.0], + [255.0, 0.0, 0.0], + [255.0, 0.0, 0.0]], + [[255.0, 255.0, 0.0], + [255.0, 255.0, 0.0], + [255.0, 255.0, 0.0]]]) + return tf.constant(mask, dtype=tf.float32) + + def createTestKeypoints(self): + keypoints = np.array([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], + ]) + return tf.constant(keypoints, dtype=tf.float32) + + def createTestKeypointsInsideCrop(self): + keypoints = np.array([ + [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], + [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], + ]) + return tf.constant(keypoints, dtype=tf.float32) + + def createTestKeypointsOutsideCrop(self): + keypoints = np.array([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], + ]) + return tf.constant(keypoints, dtype=tf.float32) + + def createKeypointFlipPermutation(self): + return np.array([0, 2, 1], dtype=np.int32) + + def createTestLabels(self): + labels = tf.constant([1, 2], dtype=tf.int32) + return labels + + def createTestLabelsLong(self): + labels = tf.constant([1, 2, 4], dtype=tf.int32) + return labels + + def createTestBoxesOutOfImage(self): + boxes = tf.constant( + [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32) + return boxes + + def createTestMultiClassScores(self): + return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32) + + def expectedImagesAfterNormalization(self): + images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0], + [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0], + [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5], + [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedMaxImageAfterColorScale(self): + images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], + [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], + [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6], + [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedMinImageAfterColorScale(self): + images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1], + [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1], + [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4], + [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedImagesAfterLeftRightFlip(self): + images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1], + [0, 0, 0, -1], [0, 0, 0.5, 0.5]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1], + [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1], + [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedImagesAfterUpDownFlip(self): + images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0], + [-1, -1, 0, 0], [0, 0, 0, 0]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5], + [-1, -1, 0, 0], [-1, -1, 0, 0]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1], + [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedImagesAfterRot90(self): + images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0], + [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], + dtype=tf.float32) + images_r = tf.expand_dims(images_r, 3) + images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0], + [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]], + dtype=tf.float32) + images_g = tf.expand_dims(images_g, 3) + images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5], + [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], + dtype=tf.float32) + images_b = tf.expand_dims(images_b, 3) + images = tf.concat([images_r, images_g, images_b], 3) + return images + + def expectedBoxesAfterLeftRightFlip(self): + boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]], + dtype=tf.float32) + return boxes + + def expectedBoxesAfterUpDownFlip(self): + boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]], + dtype=tf.float32) + return boxes + + def expectedBoxesAfterRot90(self): + boxes = tf.constant( + [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32) + return boxes + + def expectedMasksAfterLeftRightFlip(self): + mask = np.array([ + [[0.0, 0.0, 255.0], + [0.0, 0.0, 255.0], + [0.0, 0.0, 255.0]], + [[0.0, 255.0, 255.0], + [0.0, 255.0, 255.0], + [0.0, 255.0, 255.0]]]) + return tf.constant(mask, dtype=tf.float32) + + def expectedMasksAfterUpDownFlip(self): + mask = np.array([ + [[255.0, 0.0, 0.0], + [255.0, 0.0, 0.0], + [255.0, 0.0, 0.0]], + [[255.0, 255.0, 0.0], + [255.0, 255.0, 0.0], + [255.0, 255.0, 0.0]]]) + return tf.constant(mask, dtype=tf.float32) + + def expectedMasksAfterRot90(self): + mask = np.array([ + [[0.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [255.0, 255.0, 255.0]], + [[0.0, 0.0, 0.0], + [255.0, 255.0, 255.0], + [255.0, 255.0, 255.0]]]) + return tf.constant(mask, dtype=tf.float32) + + def expectedLabelScoresAfterThresholding(self): + return tf.constant([1.0], dtype=tf.float32) + + def expectedBoxesAfterThresholding(self): + return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32) + + def expectedLabelsAfterThresholding(self): + return tf.constant([1], dtype=tf.float32) + + def expectedMultiClassScoresAfterThresholding(self): + return tf.constant([[1.0, 0.0]], dtype=tf.float32) + + def expectedMasksAfterThresholding(self): + mask = np.array([ + [[255.0, 0.0, 0.0], + [255.0, 0.0, 0.0], + [255.0, 0.0, 0.0]]]) + return tf.constant(mask, dtype=tf.float32) + + def expectedKeypointsAfterThresholding(self): + keypoints = np.array([ + [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] + ]) + return tf.constant(keypoints, dtype=tf.float32) + + def expectedLabelScoresAfterThresholdingWithMissingScore(self): + return tf.constant([np.nan], dtype=tf.float32) + + def expectedBoxesAfterThresholdingWithMissingScore(self): + return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32) + + def expectedLabelsAfterThresholdingWithMissingScore(self): + return tf.constant([2], dtype=tf.float32) + + def expectedLabelScoresAfterDropping(self): + return tf.constant([0.5], dtype=tf.float32) + + def expectedBoxesAfterDropping(self): + return tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) + + def expectedLabelsAfterDropping(self): + return tf.constant([2], dtype=tf.float32) + + def expectedMultiClassScoresAfterDropping(self): + return tf.constant([[0.5, 0.5]], dtype=tf.float32) + + def expectedMasksAfterDropping(self): + masks = np.array([[[255.0, 255.0, 0.0], [255.0, 255.0, 0.0], + [255.0, 255.0, 0.0]]]) + return tf.constant(masks, dtype=tf.float32) + + def expectedKeypointsAfterDropping(self): + keypoints = np.array([[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]]) + return tf.constant(keypoints, dtype=tf.float32) + + def expectedLabelsAfterRemapping(self): + return tf.constant([3, 3, 4], dtype=tf.float32) + + def testRgbToGrayscale(self): + images = self.createTestImages() + grayscale_images = preprocessor._rgb_to_grayscale(images) + expected_images = tf.image.rgb_to_grayscale(images) + with self.test_session() as sess: + (grayscale_images, expected_images) = sess.run( + [grayscale_images, expected_images]) + self.assertAllEqual(expected_images, grayscale_images) + + def testNormalizeImage(self): + preprocess_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 256, + 'target_minval': -1, + 'target_maxval': 1 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + images_expected = self.expectedImagesAfterNormalization() + + with self.test_session() as sess: + (images_, images_expected_) = sess.run( + [images, images_expected]) + images_shape_ = images_.shape + images_expected_shape_ = images_expected_.shape + expected_shape = [1, 4, 4, 3] + self.assertAllEqual(images_expected_shape_, images_shape_) + self.assertAllEqual(images_shape_, expected_shape) + self.assertAllClose(images_, images_expected_) + + def testRetainBoxesAboveThreshold(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + (retained_boxes, retained_labels, + retained_weights) = preprocessor.retain_boxes_above_threshold( + boxes, labels, weights, threshold=0.6) + with self.test_session() as sess: + (retained_boxes_, retained_labels_, retained_weights_, + expected_retained_boxes_, expected_retained_labels_, + expected_retained_weights_) = sess.run([ + retained_boxes, retained_labels, retained_weights, + self.expectedBoxesAfterThresholding(), + self.expectedLabelsAfterThresholding(), + self.expectedLabelScoresAfterThresholding()]) + self.assertAllClose( + retained_boxes_, expected_retained_boxes_) + self.assertAllClose( + retained_labels_, expected_retained_labels_) + self.assertAllClose( + retained_weights_, expected_retained_weights_) + + def testRetainBoxesAboveThresholdWithMultiClassScores(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + multiclass_scores = self.createTestMultiClassScores() + (_, _, _, + retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold( + boxes, + labels, + weights, + multiclass_scores=multiclass_scores, + threshold=0.6) + with self.test_session() as sess: + (retained_multiclass_scores_, + expected_retained_multiclass_scores_) = sess.run([ + retained_multiclass_scores, + self.expectedMultiClassScoresAfterThresholding() + ]) + + self.assertAllClose(retained_multiclass_scores_, + expected_retained_multiclass_scores_) + + def testRetainBoxesAboveThresholdWithMasks(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = self.createTestMasks() + _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold( + boxes, labels, weights, masks, threshold=0.6) + with self.test_session() as sess: + retained_masks_, expected_retained_masks_ = sess.run([ + retained_masks, self.expectedMasksAfterThresholding()]) + + self.assertAllClose( + retained_masks_, expected_retained_masks_) + + def testRetainBoxesAboveThresholdWithKeypoints(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypoints() + (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold( + boxes, labels, weights, keypoints=keypoints, threshold=0.6) + with self.test_session() as sess: + (retained_keypoints_, + expected_retained_keypoints_) = sess.run([ + retained_keypoints, + self.expectedKeypointsAfterThresholding()]) + + self.assertAllClose( + retained_keypoints_, expected_retained_keypoints_) + + def testDropLabelProbabilistically(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + (retained_boxes, retained_labels, + retained_weights) = preprocessor.drop_label_probabilistically( + boxes, labels, weights, dropped_label=1, drop_probability=1.0) + with self.test_session() as sess: + (retained_boxes_, retained_labels_, retained_weights_, + expected_retained_boxes_, expected_retained_labels_, + expected_retained_weights_) = sess.run([ + retained_boxes, retained_labels, retained_weights, + self.expectedBoxesAfterDropping(), + self.expectedLabelsAfterDropping(), + self.expectedLabelScoresAfterDropping() + ]) + self.assertAllClose(retained_boxes_, expected_retained_boxes_) + self.assertAllClose(retained_labels_, expected_retained_labels_) + self.assertAllClose(retained_weights_, expected_retained_weights_) + + def testDropLabelProbabilisticallyWithProbabilityHalf(self): + # Boxes contain one box of label 2 and one box of label 1 which should be + # dropped ~50% of the time. + num_tests = 100 + total = 0 + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + (_, retained_labels, _) = preprocessor.drop_label_probabilistically( + boxes, labels, weights, dropped_label=1, drop_probability=0.5) + for _ in range(num_tests): + with self.test_session() as sess: + retained_labels_ = sess.run(retained_labels) + total += len(retained_labels_) + self.assertIn(2, retained_labels_) + av = total * 1.0 / num_tests + self.assertGreater(av, 1.40) + self.assertLess(av, 1.50) + + def testDropLabelProbabilisticallyWithMultiClassScores(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + multiclass_scores = self.createTestMultiClassScores() + (_, _, _, + retained_multiclass_scores) = preprocessor.drop_label_probabilistically( + boxes, + labels, + weights, + multiclass_scores=multiclass_scores, + dropped_label=1, + drop_probability=1.0) + with self.test_session() as sess: + (retained_multiclass_scores_, + expected_retained_multiclass_scores_) = sess.run([ + retained_multiclass_scores, + self.expectedMultiClassScoresAfterDropping() + ]) + self.assertAllClose(retained_multiclass_scores_, + expected_retained_multiclass_scores_) + + def testDropLabelProbabilisticallyWithMasks(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = self.createTestMasks() + (_, _, _, retained_masks) = preprocessor.drop_label_probabilistically( + boxes, + labels, + weights, + masks=masks, + dropped_label=1, + drop_probability=1.0) + with self.test_session() as sess: + (retained_masks_, expected_retained_masks_) = sess.run( + [retained_masks, self.expectedMasksAfterDropping()]) + self.assertAllClose(retained_masks_, expected_retained_masks_) + + def testDropLabelProbabilisticallyWithKeypoints(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypoints() + (_, _, _, retained_keypoints) = preprocessor.drop_label_probabilistically( + boxes, + labels, + weights, + keypoints=keypoints, + dropped_label=1, + drop_probability=1.0) + with self.test_session() as sess: + (retained_keypoints_, expected_retained_keypoints_) = sess.run( + [retained_keypoints, + self.expectedKeypointsAfterDropping()]) + self.assertAllClose(retained_keypoints_, expected_retained_keypoints_) + + def testRemapLabels(self): + labels = self.createTestLabelsLong() + remapped_labels = preprocessor.remap_labels(labels, [1, 2], 3) + with self.test_session() as sess: + (remapped_labels_, expected_remapped_labels_) = sess.run( + [remapped_labels, self.expectedLabelsAfterRemapping()]) + self.assertAllClose(remapped_labels_, expected_remapped_labels_) + + def testFlipBoxesLeftRight(self): + boxes = self.createTestBoxes() + flipped_boxes = preprocessor._flip_boxes_left_right(boxes) + expected_boxes = self.expectedBoxesAfterLeftRightFlip() + with self.test_session() as sess: + flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) + self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) + + def testFlipBoxesUpDown(self): + boxes = self.createTestBoxes() + flipped_boxes = preprocessor._flip_boxes_up_down(boxes) + expected_boxes = self.expectedBoxesAfterUpDownFlip() + with self.test_session() as sess: + flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) + self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) + + def testRot90Boxes(self): + boxes = self.createTestBoxes() + rotated_boxes = preprocessor._rot90_boxes(boxes) + expected_boxes = self.expectedBoxesAfterRot90() + with self.test_session() as sess: + rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes]) + self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten()) + + def testFlipMasksLeftRight(self): + test_mask = self.createTestMasks() + flipped_mask = preprocessor._flip_masks_left_right(test_mask) + expected_mask = self.expectedMasksAfterLeftRightFlip() + with self.test_session() as sess: + flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) + self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) + + def testFlipMasksUpDown(self): + test_mask = self.createTestMasks() + flipped_mask = preprocessor._flip_masks_up_down(test_mask) + expected_mask = self.expectedMasksAfterUpDownFlip() + with self.test_session() as sess: + flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) + self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) + + def testRot90Masks(self): + test_mask = self.createTestMasks() + rotated_mask = preprocessor._rot90_masks(test_mask) + expected_mask = self.expectedMasksAfterRot90() + with self.test_session() as sess: + rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask]) + self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten()) + + def _testPreprocessorCache(self, + preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False, + num_runs=4): + cache = preprocessor_cache.PreprocessorCache() + images = self.createTestImages() + boxes = self.createTestBoxes() + weights = self.createTestGroundtruthWeights() + classes = self.createTestLabels() + masks = self.createTestMasks() + keypoints = self.createTestKeypoints() + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=test_masks, include_keypoints=test_keypoints) + out = [] + for i in range(num_runs): + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_weights: weights + } + num_outputs = 1 + if test_boxes: + tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes + tensor_dict[fields.InputDataFields.groundtruth_classes] = classes + num_outputs += 1 + if test_masks: + tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks + num_outputs += 1 + if test_keypoints: + tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints + num_outputs += 1 + out.append(preprocessor.preprocess( + tensor_dict, preprocess_options, preprocessor_arg_map, cache)) + + with self.test_session() as sess: + to_run = [] + for i in range(num_runs): + to_run.append(out[i][fields.InputDataFields.image]) + if test_boxes: + to_run.append(out[i][fields.InputDataFields.groundtruth_boxes]) + if test_masks: + to_run.append( + out[i][fields.InputDataFields.groundtruth_instance_masks]) + if test_keypoints: + to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints]) + + out_array = sess.run(to_run) + for i in range(num_outputs, len(out_array)): + self.assertAllClose(out_array[i], out_array[i - num_outputs]) + + def testRandomHorizontalFlip(self): + preprocess_options = [(preprocessor.random_horizontal_flip, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterLeftRightFlip() + boxes_expected1 = self.expectedBoxesAfterLeftRightFlip() + images_expected2 = images + boxes_expected2 = boxes + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) + boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) + boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) + boxes_diff_expected = tf.zeros_like(boxes_diff) + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_diff_, + boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, + boxes_diff, boxes_diff_expected]) + self.assertAllClose(boxes_diff_, boxes_diff_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomHorizontalFlipWithEmptyBoxes(self): + preprocess_options = [(preprocessor.random_horizontal_flip, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createEmptyTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterLeftRightFlip() + boxes_expected = self.createEmptyTestBoxes() + images_expected2 = images + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_, + boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, + boxes_expected]) + self.assertAllClose(boxes_, boxes_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomHorizontalFlipWithCache(self): + keypoint_flip_permutation = self.createKeypointFlipPermutation() + preprocess_options = [ + (preprocessor.random_horizontal_flip, + {'keypoint_flip_permutation': keypoint_flip_permutation})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRunRandomHorizontalFlipWithMaskAndKeypoints(self): + preprocess_options = [(preprocessor.random_horizontal_flip, {})] + image_height = 3 + image_width = 3 + images = tf.random_uniform([1, image_height, image_width, 3]) + boxes = self.createTestBoxes() + masks = self.createTestMasks() + keypoints = self.createTestKeypoints() + keypoint_flip_permutation = self.createKeypointFlipPermutation() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_instance_masks: masks, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + preprocess_options = [ + (preprocessor.random_horizontal_flip, + {'keypoint_flip_permutation': keypoint_flip_permutation})] + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True, include_keypoints=True) + tensor_dict = preprocessor.preprocess( + tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) + self.assertTrue(boxes is not None) + self.assertTrue(masks is not None) + self.assertTrue(keypoints is not None) + + def testRandomVerticalFlip(self): + preprocess_options = [(preprocessor.random_vertical_flip, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterUpDownFlip() + boxes_expected1 = self.expectedBoxesAfterUpDownFlip() + images_expected2 = images + boxes_expected2 = boxes + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) + boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) + boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) + boxes_diff_expected = tf.zeros_like(boxes_diff) + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_diff_, + boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, + boxes_diff, boxes_diff_expected]) + self.assertAllClose(boxes_diff_, boxes_diff_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomVerticalFlipWithEmptyBoxes(self): + preprocess_options = [(preprocessor.random_vertical_flip, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createEmptyTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterUpDownFlip() + boxes_expected = self.createEmptyTestBoxes() + images_expected2 = images + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_, + boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, + boxes_expected]) + self.assertAllClose(boxes_, boxes_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomVerticalFlipWithCache(self): + keypoint_flip_permutation = self.createKeypointFlipPermutation() + preprocess_options = [ + (preprocessor.random_vertical_flip, + {'keypoint_flip_permutation': keypoint_flip_permutation})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRunRandomVerticalFlipWithMaskAndKeypoints(self): + preprocess_options = [(preprocessor.random_vertical_flip, {})] + image_height = 3 + image_width = 3 + images = tf.random_uniform([1, image_height, image_width, 3]) + boxes = self.createTestBoxes() + masks = self.createTestMasks() + keypoints = self.createTestKeypoints() + keypoint_flip_permutation = self.createKeypointFlipPermutation() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_instance_masks: masks, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + preprocess_options = [ + (preprocessor.random_vertical_flip, + {'keypoint_flip_permutation': keypoint_flip_permutation})] + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True, include_keypoints=True) + tensor_dict = preprocessor.preprocess( + tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) + self.assertTrue(boxes is not None) + self.assertTrue(masks is not None) + self.assertTrue(keypoints is not None) + + def testRandomRotation90(self): + preprocess_options = [(preprocessor.random_rotation90, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterRot90() + boxes_expected1 = self.expectedBoxesAfterRot90() + images_expected2 = images + boxes_expected2 = boxes + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) + boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) + boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) + boxes_diff_expected = tf.zeros_like(boxes_diff) + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_diff_, + boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, + boxes_diff, boxes_diff_expected]) + self.assertAllClose(boxes_diff_, boxes_diff_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomRotation90WithEmptyBoxes(self): + preprocess_options = [(preprocessor.random_rotation90, {})] + images = self.expectedImagesAfterNormalization() + boxes = self.createEmptyTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + images_expected1 = self.expectedImagesAfterRot90() + boxes_expected = self.createEmptyTestBoxes() + images_expected2 = images + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images = tensor_dict[fields.InputDataFields.image] + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + + images_diff1 = tf.squared_difference(images, images_expected1) + images_diff2 = tf.squared_difference(images, images_expected2) + images_diff = tf.multiply(images_diff1, images_diff2) + images_diff_expected = tf.zeros_like(images_diff) + + with self.test_session() as sess: + (images_diff_, images_diff_expected_, boxes_, + boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, + boxes_expected]) + self.assertAllClose(boxes_, boxes_expected_) + self.assertAllClose(images_diff_, images_diff_expected_) + + def testRandomRotation90WithCache(self): + preprocess_options = [(preprocessor.random_rotation90, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRunRandomRotation90WithMaskAndKeypoints(self): + preprocess_options = [(preprocessor.random_rotation90, {})] + image_height = 3 + image_width = 3 + images = tf.random_uniform([1, image_height, image_width, 3]) + boxes = self.createTestBoxes() + masks = self.createTestMasks() + keypoints = self.createTestKeypoints() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_instance_masks: masks, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True, include_keypoints=True) + tensor_dict = preprocessor.preprocess( + tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) + boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) + self.assertTrue(boxes is not None) + self.assertTrue(masks is not None) + self.assertTrue(keypoints is not None) + + def testRandomPixelValueScale(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_pixel_value_scale, {})) + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_min = tf.cast(images, dtype=tf.float32) * 0.9 / 255.0 + images_max = tf.cast(images, dtype=tf.float32) * 1.1 / 255.0 + images = tensor_dict[fields.InputDataFields.image] + values_greater = tf.greater_equal(images, images_min) + values_less = tf.less_equal(images, images_max) + values_true = tf.fill([1, 4, 4, 3], True) + with self.test_session() as sess: + (values_greater_, values_less_, values_true_) = sess.run( + [values_greater, values_less, values_true]) + self.assertAllClose(values_greater_, values_true_) + self.assertAllClose(values_less_, values_true_) + + def testRandomPixelValueScaleWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_pixel_value_scale, {})) + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=False, + test_keypoints=False) + + def testRandomImageScale(self): + preprocess_options = [(preprocessor.random_image_scale, {})] + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images_scaled = tensor_dict[fields.InputDataFields.image] + images_original_shape = tf.shape(images_original) + images_scaled_shape = tf.shape(images_scaled) + with self.test_session() as sess: + (images_original_shape_, images_scaled_shape_) = sess.run( + [images_original_shape, images_scaled_shape]) + self.assertTrue( + images_original_shape_[1] * 0.5 <= images_scaled_shape_[1]) + self.assertTrue( + images_original_shape_[1] * 2.0 >= images_scaled_shape_[1]) + self.assertTrue( + images_original_shape_[2] * 0.5 <= images_scaled_shape_[2]) + self.assertTrue( + images_original_shape_[2] * 2.0 >= images_scaled_shape_[2]) + + def testRandomImageScaleWithCache(self): + preprocess_options = [(preprocessor.random_image_scale, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomRGBtoGray(self): + preprocess_options = [(preprocessor.random_rgb_to_gray, {})] + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) + images_gray = tensor_dict[fields.InputDataFields.image] + images_gray_r, images_gray_g, images_gray_b = tf.split( + value=images_gray, num_or_size_splits=3, axis=3) + images_r, images_g, images_b = tf.split( + value=images_original, num_or_size_splits=3, axis=3) + images_r_diff1 = tf.squared_difference( + tf.cast(images_r, dtype=tf.float32), + tf.cast(images_gray_r, dtype=tf.float32)) + images_r_diff2 = tf.squared_difference( + tf.cast(images_gray_r, dtype=tf.float32), + tf.cast(images_gray_g, dtype=tf.float32)) + images_r_diff = tf.multiply(images_r_diff1, images_r_diff2) + images_g_diff1 = tf.squared_difference( + tf.cast(images_g, dtype=tf.float32), + tf.cast(images_gray_g, dtype=tf.float32)) + images_g_diff2 = tf.squared_difference( + tf.cast(images_gray_g, dtype=tf.float32), + tf.cast(images_gray_b, dtype=tf.float32)) + images_g_diff = tf.multiply(images_g_diff1, images_g_diff2) + images_b_diff1 = tf.squared_difference( + tf.cast(images_b, dtype=tf.float32), + tf.cast(images_gray_b, dtype=tf.float32)) + images_b_diff2 = tf.squared_difference( + tf.cast(images_gray_b, dtype=tf.float32), + tf.cast(images_gray_r, dtype=tf.float32)) + images_b_diff = tf.multiply(images_b_diff1, images_b_diff2) + image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1]) + with self.test_session() as sess: + (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run( + [images_r_diff, images_g_diff, images_b_diff, image_zero1]) + self.assertAllClose(images_r_diff_, image_zero1_) + self.assertAllClose(images_g_diff_, image_zero1_) + self.assertAllClose(images_b_diff_, image_zero1_) + + def testRandomRGBtoGrayWithCache(self): + preprocess_options = [( + preprocessor.random_rgb_to_gray, {'probability': 0.5})] + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomAdjustBrightness(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_adjust_brightness, {})) + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_bright = tensor_dict[fields.InputDataFields.image] + image_original_shape = tf.shape(images_original) + image_bright_shape = tf.shape(images_bright) + with self.test_session() as sess: + (image_original_shape_, image_bright_shape_) = sess.run( + [image_original_shape, image_bright_shape]) + self.assertAllEqual(image_original_shape_, image_bright_shape_) + + def testRandomAdjustBrightnessWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_adjust_brightness, {})) + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomAdjustContrast(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_adjust_contrast, {})) + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_contrast = tensor_dict[fields.InputDataFields.image] + image_original_shape = tf.shape(images_original) + image_contrast_shape = tf.shape(images_contrast) + with self.test_session() as sess: + (image_original_shape_, image_contrast_shape_) = sess.run( + [image_original_shape, image_contrast_shape]) + self.assertAllEqual(image_original_shape_, image_contrast_shape_) + + def testRandomAdjustContrastWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_adjust_contrast, {})) + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomAdjustHue(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_adjust_hue, {})) + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_hue = tensor_dict[fields.InputDataFields.image] + image_original_shape = tf.shape(images_original) + image_hue_shape = tf.shape(images_hue) + with self.test_session() as sess: + (image_original_shape_, image_hue_shape_) = sess.run( + [image_original_shape, image_hue_shape]) + self.assertAllEqual(image_original_shape_, image_hue_shape_) + + def testRandomAdjustHueWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_adjust_hue, {})) + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomDistortColor(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_distort_color, {})) + images_original = self.createTestImages() + images_original_shape = tf.shape(images_original) + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_distorted_color = tensor_dict[fields.InputDataFields.image] + images_distorted_color_shape = tf.shape(images_distorted_color) + with self.test_session() as sess: + (images_original_shape_, images_distorted_color_shape_) = sess.run( + [images_original_shape, images_distorted_color_shape]) + self.assertAllEqual(images_original_shape_, images_distorted_color_shape_) + + def testRandomDistortColorWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_distort_color, {})) + self._testPreprocessorCache(preprocess_options, + test_boxes=False, + test_masks=False, + test_keypoints=False) + + def testRandomJitterBoxes(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.random_jitter_boxes, {})) + boxes = self.createTestBoxes() + boxes_shape = tf.shape(boxes) + tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + distorted_boxes_shape = tf.shape(distorted_boxes) + + with self.test_session() as sess: + (boxes_shape_, distorted_boxes_shape_) = sess.run( + [boxes_shape, distorted_boxes_shape]) + self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) + + def testRandomCropImage(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_crop_image, {})) + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + self.assertEqual(3, distorted_images.get_shape()[3]) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = sess.run([ + boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank + ]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testRandomCropImageWithCache(self): + preprocess_options = [(preprocessor.random_rgb_to_gray, + {'probability': 0.5}), + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1, + }), + (preprocessor.random_crop_image, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=False, + test_keypoints=False) + + def testRandomCropImageGrayscale(self): + preprocessing_options = [(preprocessor.rgb_to_gray, {}), + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1, + }), + (preprocessor.random_crop_image, {})] + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + self.assertEqual(1, distorted_images.get_shape()[3]) + + with self.test_session() as sess: + session_results = sess.run([ + boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank + ]) + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = session_results + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testRandomCropImageWithBoxOutOfImage(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_crop_image, {})) + images = self.createTestImages() + boxes = self.createTestBoxesOutOfImage() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = sess.run( + [boxes_rank, distorted_boxes_rank, images_rank, + distorted_images_rank]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testRandomCropImageWithRandomCoefOne(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_crop_image, { + 'random_coef': 1.0 + })] + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_weights = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_weights] + boxes_shape = tf.shape(boxes) + distorted_boxes_shape = tf.shape(distorted_boxes) + images_shape = tf.shape(images) + distorted_images_shape = tf.shape(distorted_images) + + with self.test_session() as sess: + (boxes_shape_, distorted_boxes_shape_, images_shape_, + distorted_images_shape_, images_, distorted_images_, + boxes_, distorted_boxes_, labels_, distorted_labels_, + weights_, distorted_weights_) = sess.run( + [boxes_shape, distorted_boxes_shape, images_shape, + distorted_images_shape, images, distorted_images, + boxes, distorted_boxes, labels, distorted_labels, + weights, distorted_weights]) + self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) + self.assertAllEqual(images_shape_, distorted_images_shape_) + self.assertAllClose(images_, distorted_images_) + self.assertAllClose(boxes_, distorted_boxes_) + self.assertAllEqual(labels_, distorted_labels_) + self.assertAllEqual(weights_, distorted_weights_) + + def testRandomCropWithMockSampleDistortedBoundingBox(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createColorfulTestImage() + boxes = tf.constant([[0.1, 0.1, 0.8, 0.3], + [0.2, 0.4, 0.75, 0.75], + [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32) + labels = tf.constant([1, 7, 11], dtype=tf.int32) + weights = tf.constant([1.0, 0.5, 0.6], dtype=tf.float32) + + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_crop_image, {})] + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = (tf.constant( + [6, 143, 0], dtype=tf.int32), tf.constant( + [190, 237, -1], dtype=tf.int32), tf.constant( + [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_weights = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_weights] + expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733], + [0.28421, 0.0, 0.38947365, 0.57805908]], + dtype=tf.float32) + expected_labels = tf.constant([7, 11], dtype=tf.int32) + expected_weights = tf.constant([0.5, 0.6], dtype=tf.float32) + + with self.test_session() as sess: + (distorted_boxes_, distorted_labels_, distorted_weights_, + expected_boxes_, expected_labels_, expected_weights_) = sess.run( + [distorted_boxes, distorted_labels, distorted_weights, + expected_boxes, expected_labels, expected_weights]) + self.assertAllClose(distorted_boxes_, expected_boxes_) + self.assertAllEqual(distorted_labels_, expected_labels_) + self.assertAllEqual(distorted_weights_, expected_weights_) + + def testRandomCropWithoutClipBoxes(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createColorfulTestImage() + boxes = tf.constant([[0.1, 0.1, 0.8, 0.3], + [0.2, 0.4, 0.75, 0.75], + [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32) + keypoints = tf.constant([ + [[0.1, 0.1], [0.8, 0.3]], + [[0.2, 0.4], [0.75, 0.75]], + [[0.3, 0.1], [0.4, 0.7]], + ], dtype=tf.float32) + labels = tf.constant([1, 7, 11], dtype=tf.int32) + weights = tf.constant([1.0, 0.5, 0.6], dtype=tf.float32) + + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_keypoints: keypoints, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + + preprocessing_options = [(preprocessor.random_crop_image, { + 'clip_boxes': False, + })] + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = (tf.constant( + [6, 143, 0], dtype=tf.int32), tf.constant( + [190, 237, -1], dtype=tf.int32), tf.constant( + [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_keypoints = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_weights = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_weights] + expected_boxes = tf.constant( + [[0.178947, 0.07173, 0.75789469, 0.66244733], + [0.28421, -0.434599, 0.38947365, 0.57805908]], + dtype=tf.float32) + expected_keypoints = tf.constant( + [[[0.178947, 0.07173], [0.75789469, 0.66244733]], + [[0.28421, -0.434599], [0.38947365, 0.57805908]]], + dtype=tf.float32) + expected_labels = tf.constant([7, 11], dtype=tf.int32) + expected_weights = tf.constant([0.5, 0.6], dtype=tf.float32) + + with self.test_session() as sess: + (distorted_boxes_, distorted_keypoints_, distorted_labels_, + distorted_weights_, expected_boxes_, expected_keypoints_, + expected_labels_, expected_weights_) = sess.run( + [distorted_boxes, distorted_keypoints, distorted_labels, + distorted_weights, expected_boxes, expected_keypoints, + expected_labels, expected_weights]) + self.assertAllClose(distorted_boxes_, expected_boxes_) + self.assertAllClose(distorted_keypoints_, expected_keypoints_) + self.assertAllEqual(distorted_labels_, expected_labels_) + self.assertAllEqual(distorted_weights_, expected_weights_) + + def testRandomCropImageWithMultiClassScores(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_crop_image, {})) + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + multiclass_scores = self.createTestMultiClassScores() + + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.multiclass_scores: multiclass_scores + } + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_multiclass_scores = distorted_tensor_dict[ + fields.InputDataFields.multiclass_scores] + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + multiclass_scores_rank = tf.rank(multiclass_scores) + distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_, multiclass_scores_rank_, + distorted_multiclass_scores_rank_, + distorted_multiclass_scores_) = sess.run([ + boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank, + distorted_images_rank, multiclass_scores_rank, + distorted_multiclass_scores_rank, distorted_multiclass_scores + ]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + self.assertAllEqual(multiclass_scores_rank_, + distorted_multiclass_scores_rank_) + self.assertAllEqual(distorted_boxes_.shape[0], + distorted_multiclass_scores_.shape[0]) + + def testStrictRandomCropImageWithGroundtruthWeights(self): + image = self.createColorfulTestImage()[0] + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + new_image, new_boxes, new_labels, new_groundtruth_weights = ( + preprocessor._strict_random_crop_image( + image, boxes, labels, weights)) + with self.test_session() as sess: + new_image, new_boxes, new_labels, new_groundtruth_weights = ( + sess.run( + [new_image, new_boxes, new_labels, new_groundtruth_weights]) + ) + + expected_boxes = np.array( + [[0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) + self.assertAllEqual(new_image.shape, [190, 237, 3]) + self.assertAllEqual(new_groundtruth_weights, [1.0, 0.5]) + self.assertAllClose( + new_boxes.flatten(), expected_boxes.flatten()) + + def testStrictRandomCropImageWithMasks(self): + image = self.createColorfulTestImage()[0] + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + new_image, new_boxes, new_labels, new_weights, new_masks = ( + preprocessor._strict_random_crop_image( + image, boxes, labels, weights, masks=masks)) + with self.test_session() as sess: + new_image, new_boxes, new_labels, new_weights, new_masks = sess.run( + [new_image, new_boxes, new_labels, new_weights, new_masks]) + expected_boxes = np.array( + [[0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) + self.assertAllEqual(new_image.shape, [190, 237, 3]) + self.assertAllEqual(new_masks.shape, [2, 190, 237]) + self.assertAllClose( + new_boxes.flatten(), expected_boxes.flatten()) + + def testStrictRandomCropImageWithKeypoints(self): + image = self.createColorfulTestImage()[0] + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypoints() + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + new_image, new_boxes, new_labels, new_weights, new_keypoints = ( + preprocessor._strict_random_crop_image( + image, boxes, labels, weights, keypoints=keypoints)) + with self.test_session() as sess: + new_image, new_boxes, new_labels, new_weights, new_keypoints = sess.run( + [new_image, new_boxes, new_labels, new_weights, new_keypoints]) + + expected_boxes = np.array([ + [0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32) + expected_keypoints = np.array([ + [[np.nan, np.nan], + [np.nan, np.nan], + [np.nan, np.nan]], + [[0.38947368, 0.07173], + [0.49473682, 0.24050637], + [0.60000002, 0.40928277]] + ], dtype=np.float32) + self.assertAllEqual(new_image.shape, [190, 237, 3]) + self.assertAllClose( + new_boxes.flatten(), expected_boxes.flatten()) + self.assertAllClose( + new_keypoints.flatten(), expected_keypoints.flatten()) + + def testRunRandomCropImageWithMasks(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_instance_masks: masks, + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True) + + preprocessing_options = [(preprocessor.random_crop_image, {})] + + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_masks = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_masks_) = sess.run( + [distorted_image, distorted_boxes, distorted_labels, + distorted_masks]) + + expected_boxes = np.array([ + [0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0], + ], dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) + self.assertAllEqual(distorted_masks_.shape, [2, 190, 237]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose( + distorted_boxes_.flatten(), expected_boxes.flatten()) + + def testRunRandomCropImageWithKeypointsInsideCrop(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypointsInsideCrop() + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_keypoints: keypoints, + fields.InputDataFields.groundtruth_weights: weights + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + + preprocessing_options = [(preprocessor.random_crop_image, {})] + + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_keypoints = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_keypoints_) = sess.run( + [distorted_image, distorted_boxes, distorted_labels, + distorted_keypoints]) + + expected_boxes = np.array([ + [0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0], + ], dtype=np.float32) + expected_keypoints = np.array([ + [[0.38947368, 0.07173], + [0.49473682, 0.24050637], + [0.60000002, 0.40928277]], + [[0.38947368, 0.07173], + [0.49473682, 0.24050637], + [0.60000002, 0.40928277]] + ]) + self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose( + distorted_boxes_.flatten(), expected_boxes.flatten()) + self.assertAllClose( + distorted_keypoints_.flatten(), expected_keypoints.flatten()) + + def testRunRandomCropImageWithKeypointsOutsideCrop(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypointsOutsideCrop() + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + + preprocessing_options = [(preprocessor.random_crop_image, {})] + + with mock.patch.object( + tf.image, + 'sample_distorted_bounding_box' + ) as mock_sample_distorted_bounding_box: + mock_sample_distorted_bounding_box.return_value = ( + tf.constant([6, 143, 0], dtype=tf.int32), + tf.constant([190, 237, -1], dtype=tf.int32), + tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_keypoints = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_keypoints_) = sess.run( + [distorted_image, distorted_boxes, distorted_labels, + distorted_keypoints]) + + expected_boxes = np.array([ + [0.0, 0.0, 0.75789469, 1.0], + [0.23157893, 0.24050637, 0.75789469, 1.0], + ], dtype=np.float32) + expected_keypoints = np.array([ + [[np.nan, np.nan], + [np.nan, np.nan], + [np.nan, np.nan]], + [[np.nan, np.nan], + [np.nan, np.nan], + [np.nan, np.nan]], + ]) + self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose( + distorted_boxes_.flatten(), expected_boxes.flatten()) + self.assertAllClose( + distorted_keypoints_.flatten(), expected_keypoints.flatten()) + + def testRunRetainBoxesAboveThreshold(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + + tensor_dict = { + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + + preprocessing_options = [ + (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) + ] + preprocessor_arg_map = preprocessor.get_default_func_arg_map() + retained_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + retained_boxes = retained_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + retained_labels = retained_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + retained_weights = retained_tensor_dict[ + fields.InputDataFields.groundtruth_weights] + + with self.test_session() as sess: + (retained_boxes_, retained_labels_, + retained_weights_, expected_retained_boxes_, + expected_retained_labels_, expected_retained_weights_) = sess.run( + [retained_boxes, retained_labels, retained_weights, + self.expectedBoxesAfterThresholding(), + self.expectedLabelsAfterThresholding(), + self.expectedLabelScoresAfterThresholding()]) + + self.assertAllClose(retained_boxes_, expected_retained_boxes_) + self.assertAllClose(retained_labels_, expected_retained_labels_) + self.assertAllClose( + retained_weights_, expected_retained_weights_) + + def testRunRetainBoxesAboveThresholdWithMasks(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = self.createTestMasks() + + tensor_dict = { + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_instance_masks: masks + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_label_weights=True, + include_instance_masks=True) + + preprocessing_options = [ + (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) + ] + + retained_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + retained_masks = retained_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] + + with self.test_session() as sess: + (retained_masks_, expected_masks_) = sess.run( + [retained_masks, + self.expectedMasksAfterThresholding()]) + self.assertAllClose(retained_masks_, expected_masks_) + + def testRunRetainBoxesAboveThresholdWithKeypoints(self): + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypoints() + + tensor_dict = { + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + + preprocessing_options = [ + (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) + ] + + retained_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + retained_keypoints = retained_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + + with self.test_session() as sess: + (retained_keypoints_, expected_keypoints_) = sess.run( + [retained_keypoints, + self.expectedKeypointsAfterThresholding()]) + self.assertAllClose(retained_keypoints_, expected_keypoints_) + + def testRandomCropToAspectRatioWithCache(self): + preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=False, + test_keypoints=False) + + def testRunRandomCropToAspectRatioWithMasks(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_instance_masks: masks + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True) + + preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] + + with mock.patch.object(preprocessor, + '_random_integer') as mock_random_integer: + mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_masks = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_masks_) = sess.run([ + distorted_image, distorted_boxes, distorted_labels, distorted_masks + ]) + + expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) + self.assertAllEqual(distorted_labels_, [1]) + self.assertAllClose(distorted_boxes_.flatten(), + expected_boxes.flatten()) + self.assertAllEqual(distorted_masks_.shape, [1, 200, 200]) + + def testRunRandomCropToAspectRatioWithKeypoints(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + keypoints = self.createTestKeypoints() + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + + preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] + + with mock.patch.object(preprocessor, + '_random_integer') as mock_random_integer: + mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_keypoints = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_keypoints_) = sess.run([ + distorted_image, distorted_boxes, distorted_labels, + distorted_keypoints + ]) + + expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) + expected_keypoints = np.array( + [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) + self.assertAllEqual(distorted_labels_, [1]) + self.assertAllClose(distorted_boxes_.flatten(), + expected_boxes.flatten()) + self.assertAllClose(distorted_keypoints_.flatten(), + expected_keypoints.flatten()) + + def testRandomPadToAspectRatioWithCache(self): + preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRunRandomPadToAspectRatioWithMinMaxPaddedSizeRatios(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map() + preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, + {'min_padded_size_ratio': (4.0, 4.0), + 'max_padded_size_ratio': (4.0, 4.0)})] + + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + with self.test_session() as sess: + distorted_image_, distorted_boxes_, distorted_labels_ = sess.run([ + distorted_image, distorted_boxes, distorted_labels]) + + expected_boxes = np.array( + [[0.0, 0.125, 0.1875, 0.5], [0.0625, 0.25, 0.1875, 0.5]], + dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 800, 800, 3]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose(distorted_boxes_.flatten(), + expected_boxes.flatten()) + + def testRunRandomPadToAspectRatioWithMasks(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_instance_masks: masks + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True) + + preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] + + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_masks = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_masks_) = sess.run([ + distorted_image, distorted_boxes, distorted_labels, distorted_masks + ]) + + expected_boxes = np.array( + [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose(distorted_boxes_.flatten(), + expected_boxes.flatten()) + self.assertAllEqual(distorted_masks_.shape, [2, 400, 400]) + + def testRunRandomPadToAspectRatioWithKeypoints(self): + image = self.createColorfulTestImage() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + keypoints = self.createTestKeypoints() + + tensor_dict = { + fields.InputDataFields.image: image, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_keypoints: keypoints + } + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_keypoints=True) + + preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] + + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_image = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_labels = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_classes] + distorted_keypoints = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + with self.test_session() as sess: + (distorted_image_, distorted_boxes_, distorted_labels_, + distorted_keypoints_) = sess.run([ + distorted_image, distorted_boxes, distorted_labels, + distorted_keypoints + ]) + + expected_boxes = np.array( + [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) + expected_keypoints = np.array([ + [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]], + [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]], + ], dtype=np.float32) + self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) + self.assertAllEqual(distorted_labels_, [1, 2]) + self.assertAllClose(distorted_boxes_.flatten(), + expected_boxes.flatten()) + self.assertAllClose(distorted_keypoints_.flatten(), + expected_keypoints.flatten()) + + def testRandomPadImageWithCache(self): + preprocess_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1,}), (preprocessor.random_pad_image, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRandomPadImage(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_pad_image, {})] + padded_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + padded_images = padded_tensor_dict[fields.InputDataFields.image] + padded_boxes = padded_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_shape = tf.shape(boxes) + padded_boxes_shape = tf.shape(padded_boxes) + images_shape = tf.shape(images) + padded_images_shape = tf.shape(padded_images) + + with self.test_session() as sess: + (boxes_shape_, padded_boxes_shape_, images_shape_, + padded_images_shape_, boxes_, padded_boxes_) = sess.run( + [boxes_shape, padded_boxes_shape, images_shape, + padded_images_shape, boxes, padded_boxes]) + self.assertAllEqual(boxes_shape_, padded_boxes_shape_) + self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) + self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) + self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) + self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) + self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( + padded_boxes_[:, 2] - padded_boxes_[:, 0]))) + self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( + padded_boxes_[:, 3] - padded_boxes_[:, 1]))) + + def testRandomPadImageWithKeypoints(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + keypoints = self.createTestKeypoints() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_keypoints: keypoints, + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_pad_image, {})] + padded_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + padded_images = padded_tensor_dict[fields.InputDataFields.image] + padded_boxes = padded_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + padded_keypoints = padded_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] + boxes_shape = tf.shape(boxes) + padded_boxes_shape = tf.shape(padded_boxes) + keypoints_shape = tf.shape(keypoints) + padded_keypoints_shape = tf.shape(padded_keypoints) + images_shape = tf.shape(images) + padded_images_shape = tf.shape(padded_images) + + with self.test_session() as sess: + (boxes_shape_, padded_boxes_shape_, keypoints_shape_, + padded_keypoints_shape_, images_shape_, padded_images_shape_, boxes_, + padded_boxes_, keypoints_, padded_keypoints_) = sess.run( + [boxes_shape, padded_boxes_shape, keypoints_shape, + padded_keypoints_shape, images_shape, padded_images_shape, boxes, + padded_boxes, keypoints, padded_keypoints]) + self.assertAllEqual(boxes_shape_, padded_boxes_shape_) + self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_) + self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) + self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) + self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) + self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) + self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( + padded_boxes_[:, 2] - padded_boxes_[:, 0]))) + self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( + padded_boxes_[:, 3] - padded_boxes_[:, 1]))) + self.assertTrue(np.all((keypoints_[1, :, 0] - keypoints_[0, :, 0]) >= ( + padded_keypoints_[1, :, 0] - padded_keypoints_[0, :, 0]))) + self.assertTrue(np.all((keypoints_[1, :, 1] - keypoints_[0, :, 1]) >= ( + padded_keypoints_[1, :, 1] - padded_keypoints_[0, :, 1]))) + + def testRandomAbsolutePadImage(self): + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + tensor_dict = { + fields.InputDataFields.image: tf.cast(images, dtype=tf.float32), + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + } + + height_padding = 10 + width_padding = 20 + preprocessing_options = [(preprocessor.random_absolute_pad_image, { + 'max_height_padding': height_padding, + 'max_width_padding': width_padding})] + padded_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + original_shape = tf.shape(images) + final_shape = tf.shape(padded_tensor_dict[fields.InputDataFields.image]) + + with self.test_session() as sess: + _, height, width, _ = sess.run(original_shape) + for _ in range(100): + output_shape = sess.run(final_shape) + + self.assertTrue(output_shape[1] >= height) + self.assertTrue(output_shape[1] < height + height_padding) + self.assertTrue(output_shape[2] >= width) + self.assertTrue(output_shape[2] < width + width_padding) + + def testRandomCropPadImageWithCache(self): + preprocess_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRandomCropPadImageWithRandomCoefOne(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })] + + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_crop_pad_image, { + 'random_coef': 1.0 + })] + padded_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + padded_images = padded_tensor_dict[fields.InputDataFields.image] + padded_boxes = padded_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_shape = tf.shape(boxes) + padded_boxes_shape = tf.shape(padded_boxes) + images_shape = tf.shape(images) + padded_images_shape = tf.shape(padded_images) + + with self.test_session() as sess: + (boxes_shape_, padded_boxes_shape_, images_shape_, + padded_images_shape_, boxes_, padded_boxes_) = sess.run( + [boxes_shape, padded_boxes_shape, images_shape, + padded_images_shape, boxes, padded_boxes]) + self.assertAllEqual(boxes_shape_, padded_boxes_shape_) + self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) + self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) + self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) + self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) + self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( + padded_boxes_[:, 2] - padded_boxes_[:, 0]))) + self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( + padded_boxes_[:, 3] - padded_boxes_[:, 1]))) + + def testRandomCropToAspectRatio(self): + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + tensor_dict = preprocessor.preprocess(tensor_dict, []) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, { + 'aspect_ratio': 2.0 + })] + cropped_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + cropped_images = cropped_tensor_dict[fields.InputDataFields.image] + cropped_boxes = cropped_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_shape = tf.shape(boxes) + cropped_boxes_shape = tf.shape(cropped_boxes) + images_shape = tf.shape(images) + cropped_images_shape = tf.shape(cropped_images) + + with self.test_session() as sess: + (boxes_shape_, cropped_boxes_shape_, images_shape_, + cropped_images_shape_) = sess.run([ + boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape + ]) + self.assertAllEqual(boxes_shape_, cropped_boxes_shape_) + self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2) + self.assertEqual(images_shape_[2], cropped_images_shape_[2]) + + def testRandomPadToAspectRatio(self): + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + } + tensor_dict = preprocessor.preprocess(tensor_dict, []) + images = tensor_dict[fields.InputDataFields.image] + + preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, { + 'aspect_ratio': 2.0 + })] + padded_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + + padded_images = padded_tensor_dict[fields.InputDataFields.image] + padded_boxes = padded_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + boxes_shape = tf.shape(boxes) + padded_boxes_shape = tf.shape(padded_boxes) + images_shape = tf.shape(images) + padded_images_shape = tf.shape(padded_images) + + with self.test_session() as sess: + (boxes_shape_, padded_boxes_shape_, images_shape_, + padded_images_shape_) = sess.run([ + boxes_shape, padded_boxes_shape, images_shape, padded_images_shape + ]) + self.assertAllEqual(boxes_shape_, padded_boxes_shape_) + self.assertEqual(images_shape_[1], padded_images_shape_[1]) + self.assertEqual(2 * images_shape_[2], padded_images_shape_[2]) + + def testRandomBlackPatchesWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_black_patches, { + 'size_to_image_ratio': 0.5 + })) + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRandomBlackPatches(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_black_patches, { + 'size_to_image_ratio': 0.5 + })) + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + blacked_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + blacked_images = blacked_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + blacked_images_shape = tf.shape(blacked_images) + + with self.test_session() as sess: + (images_shape_, blacked_images_shape_) = sess.run( + [images_shape, blacked_images_shape]) + self.assertAllEqual(images_shape_, blacked_images_shape_) + + def testRandomJpegQuality(self): + preprocessing_options = [(preprocessor.random_jpeg_quality, { + 'min_jpeg_quality': 0, + 'max_jpeg_quality': 100 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + encoded_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + encoded_images_shape = tf.shape(encoded_images) + + with self.test_session() as sess: + images_shape_out, encoded_images_shape_out = sess.run( + [images_shape, encoded_images_shape]) + self.assertAllEqual(images_shape_out, encoded_images_shape_out) + + def testRandomJpegQualityKeepsStaticChannelShape(self): + # Set at least three weeks past the forward compatibility horizon for + # tf 1.14 of 2019/11/01. + # https://github.com/tensorflow/tensorflow/blob/v1.14.0/tensorflow/python/compat/compat.py#L30 + if not tf.compat.forward_compatible(year=2019, month=12, day=1): + self.skipTest('Skipping test for future functionality.') + + preprocessing_options = [(preprocessor.random_jpeg_quality, { + 'min_jpeg_quality': 0, + 'max_jpeg_quality': 100 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + encoded_images = processed_tensor_dict[fields.InputDataFields.image] + images_static_channels = images.shape[-1] + encoded_images_static_channels = encoded_images.shape[-1] + self.assertEqual(images_static_channels, encoded_images_static_channels) + + def testRandomJpegQualityWithCache(self): + preprocessing_options = [(preprocessor.random_jpeg_quality, { + 'min_jpeg_quality': 0, + 'max_jpeg_quality': 100 + })] + self._testPreprocessorCache(preprocessing_options) + + def testRandomJpegQualityWithRandomCoefOne(self): + preprocessing_options = [(preprocessor.random_jpeg_quality, { + 'random_coef': 1.0 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + encoded_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + encoded_images_shape = tf.shape(encoded_images) + + with self.test_session() as sess: + (images_out, encoded_images_out, images_shape_out, + encoded_images_shape_out) = sess.run( + [images, encoded_images, images_shape, encoded_images_shape]) + self.assertAllEqual(images_shape_out, encoded_images_shape_out) + self.assertAllEqual(images_out, encoded_images_out) + + def testRandomDownscaleToTargetPixels(self): + preprocessing_options = [(preprocessor.random_downscale_to_target_pixels, { + 'min_target_pixels': 100, + 'max_target_pixels': 101 + })] + images = tf.random_uniform([1, 25, 100, 3]) + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + downscaled_images = processed_tensor_dict[fields.InputDataFields.image] + downscaled_shape = tf.shape(downscaled_images) + expected_shape = [1, 5, 20, 3] + with self.test_session() as sess: + downscaled_shape_out = sess.run(downscaled_shape) + self.assertAllEqual(downscaled_shape_out, expected_shape) + + def testRandomDownscaleToTargetPixelsWithMasks(self): + preprocessing_options = [(preprocessor.random_downscale_to_target_pixels, { + 'min_target_pixels': 100, + 'max_target_pixels': 101 + })] + images = tf.random_uniform([1, 25, 100, 3]) + masks = tf.random_uniform([10, 25, 100]) + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_instance_masks: masks + } + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True) + processed_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + downscaled_images = processed_tensor_dict[fields.InputDataFields.image] + downscaled_masks = processed_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] + downscaled_images_shape = tf.shape(downscaled_images) + downscaled_masks_shape = tf.shape(downscaled_masks) + expected_images_shape = [1, 5, 20, 3] + expected_masks_shape = [10, 5, 20] + with self.test_session() as sess: + downscaled_images_shape_out, downscaled_masks_shape_out = sess.run( + [downscaled_images_shape, downscaled_masks_shape]) + self.assertAllEqual(downscaled_images_shape_out, expected_images_shape) + self.assertAllEqual(downscaled_masks_shape_out, expected_masks_shape) + + @parameterized.parameters( + {'test_masks': False}, + {'test_masks': True} + ) + def testRandomDownscaleToTargetPixelsWithCache(self, test_masks): + preprocessing_options = [(preprocessor.random_downscale_to_target_pixels, { + 'min_target_pixels': 100, + 'max_target_pixels': 999 + })] + self._testPreprocessorCache(preprocessing_options, test_masks=test_masks) + + def testRandomDownscaleToTargetPixelsWithRandomCoefOne(self): + preprocessing_options = [(preprocessor.random_downscale_to_target_pixels, { + 'random_coef': 1.0, + 'min_target_pixels': 10, + 'max_target_pixels': 20, + })] + images = tf.random_uniform([1, 25, 100, 3]) + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + downscaled_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + downscaled_images_shape = tf.shape(downscaled_images) + + with self.test_session() as sess: + (images_out, downscaled_images_out, images_shape_out, + downscaled_images_shape_out) = sess.run( + [images, downscaled_images, images_shape, downscaled_images_shape]) + self.assertAllEqual(images_shape_out, downscaled_images_shape_out) + self.assertAllEqual(images_out, downscaled_images_out) + + def testRandomDownscaleToTargetPixelsIgnoresSmallImages(self): + preprocessing_options = [(preprocessor.random_downscale_to_target_pixels, { + 'min_target_pixels': 1000, + 'max_target_pixels': 1001 + })] + images = tf.random_uniform([1, 10, 10, 3]) + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + downscaled_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + downscaled_images_shape = tf.shape(downscaled_images) + with self.test_session() as sess: + (images_out, downscaled_images_out, images_shape_out, + downscaled_images_shape_out) = sess.run( + [images, downscaled_images, images_shape, downscaled_images_shape]) + self.assertAllEqual(images_shape_out, downscaled_images_shape_out) + self.assertAllEqual(images_out, downscaled_images_out) + + def testRandomPatchGaussianShape(self): + preprocessing_options = [(preprocessor.random_patch_gaussian, { + 'min_patch_size': 1, + 'max_patch_size': 200, + 'min_gaussian_stddev': 0.0, + 'max_gaussian_stddev': 2.0 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + patched_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + patched_images_shape = tf.shape(patched_images) + self.assertAllEqual(images_shape, patched_images_shape) + + def testRandomPatchGaussianClippedToLowerBound(self): + preprocessing_options = [(preprocessor.random_patch_gaussian, { + 'min_patch_size': 20, + 'max_patch_size': 40, + 'min_gaussian_stddev': 50, + 'max_gaussian_stddev': 100 + })] + images = tf.zeros([1, 5, 4, 3]) + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + patched_images = processed_tensor_dict[fields.InputDataFields.image] + self.assertAllGreaterEqual(patched_images, 0.0) + + def testRandomPatchGaussianClippedToUpperBound(self): + preprocessing_options = [(preprocessor.random_patch_gaussian, { + 'min_patch_size': 20, + 'max_patch_size': 40, + 'min_gaussian_stddev': 50, + 'max_gaussian_stddev': 100 + })] + images = tf.constant(255.0, shape=[1, 5, 4, 3]) + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + patched_images = processed_tensor_dict[fields.InputDataFields.image] + self.assertAllLessEqual(patched_images, 255.0) + + def testRandomPatchGaussianWithCache(self): + preprocessing_options = [(preprocessor.random_patch_gaussian, { + 'min_patch_size': 1, + 'max_patch_size': 200, + 'min_gaussian_stddev': 0.0, + 'max_gaussian_stddev': 2.0 + })] + self._testPreprocessorCache(preprocessing_options) + + def testRandomPatchGaussianWithRandomCoefOne(self): + preprocessing_options = [(preprocessor.random_patch_gaussian, { + 'random_coef': 1.0 + })] + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + processed_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + patched_images = processed_tensor_dict[fields.InputDataFields.image] + images_shape = tf.shape(images) + patched_images_shape = tf.shape(patched_images) + + self.assertAllEqual(images_shape, patched_images_shape) + self.assertAllEqual(images, patched_images) + + def testAutoAugmentImage(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.autoaugment_image, { + 'policy_name': 'v1' + })) + images = self.createTestImages() + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes} + autoaugment_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options) + augmented_images = autoaugment_tensor_dict[fields.InputDataFields.image] + augmented_boxes = autoaugment_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + images_shape = tf.shape(images) + boxes_shape = tf.shape(boxes) + augmented_images_shape = tf.shape(augmented_images) + augmented_boxes_shape = tf.shape(augmented_boxes) + + with self.test_session() as sess: + (images_shape_, boxes_shape_, + augmented_images_shape_, augmented_boxes_shape_) = sess.run( + [images_shape, boxes_shape, + augmented_images_shape, augmented_boxes_shape]) + self.assertAllEqual(images_shape_, augmented_images_shape_) + self.assertAllEqual(boxes_shape_, augmented_boxes_shape_) + + def testRandomResizeMethodWithCache(self): + preprocess_options = [] + preprocess_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocess_options.append((preprocessor.random_resize_method, { + 'target_size': (75, 150) + })) + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=True, + test_keypoints=True) + + def testRandomResizeMethod(self): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.random_resize_method, { + 'target_size': (75, 150) + })) + images = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images} + resized_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + resized_images = resized_tensor_dict[fields.InputDataFields.image] + resized_images_shape = tf.shape(resized_images) + expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32) + + with self.test_session() as sess: + (expected_images_shape_, resized_images_shape_) = sess.run( + [expected_images_shape, resized_images_shape]) + self.assertAllEqual(expected_images_shape_, + resized_images_shape_) + + def testResizeImageWithMasks(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] + height = 50 + width = 100 + expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] + expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_image( + in_image, in_masks, new_height=height, new_width=width) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeImageWithMasksTensorInputHeightAndWidth(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] + height = tf.constant(50, dtype=tf.int32) + width = tf.constant(100, dtype=tf.int32) + expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] + expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_image( + in_image, in_masks, new_height=height, new_width=width) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeImageWithNoInstanceMask(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] + height = 50 + width = 100 + expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] + expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_image( + in_image, in_masks, new_height=height, new_width=width) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToRangePreservesStaticSpatialShape(self): + """Tests image resizing, checking output sizes.""" + in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] + min_dim = 50 + max_dim = 100 + expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] + + for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): + in_image = tf.random_uniform(in_shape) + out_image, _ = preprocessor.resize_to_range( + in_image, min_dimension=min_dim, max_dimension=max_dim) + self.assertAllEqual(out_image.get_shape().as_list(), expected_shape) + + def testResizeToRangeWithDynamicSpatialShape(self): + """Tests image resizing, checking output sizes.""" + in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] + min_dim = 50 + max_dim = 100 + expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] + + for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + out_image, _ = preprocessor.resize_to_range( + in_image, min_dimension=min_dim, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + with self.test_session() as sess: + out_image_shape = sess.run(out_image_shape, + feed_dict={in_image: + np.random.randn(*in_shape)}) + self.assertAllEqual(out_image_shape, expected_shape) + + def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self): + in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] + min_dim = 50 + max_dim = 100 + expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]] + + for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + out_image, _ = preprocessor.resize_to_range( + in_image, + min_dimension=min_dim, + max_dimension=max_dim, + pad_to_max_dimension=True) + self.assertAllEqual(out_image.shape.as_list(), expected_shape) + out_image_shape = tf.shape(out_image) + with self.test_session() as sess: + out_image_shape = sess.run( + out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)}) + self.assertAllEqual(out_image_shape, expected_shape) + + def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self): + in_image_np = np.array([[[0, 1, 2]]], np.float32) + ex_image_np = np.array( + [[[0, 1, 2], [123.68, 116.779, 103.939]], + [[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32) + min_dim = 1 + max_dim = 2 + + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + out_image, _ = preprocessor.resize_to_range( + in_image, + min_dimension=min_dim, + max_dimension=max_dim, + pad_to_max_dimension=True, + per_channel_pad_value=(123.68, 116.779, 103.939)) + + with self.test_session() as sess: + out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np}) + self.assertAllClose(ex_image_np, out_image_np) + + def testResizeToRangeWithMasksPreservesStaticSpatialShape(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] + min_dim = 50 + max_dim = 100 + expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] + expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_range( + in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) + self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape) + self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape) + + def testResizeToRangeWithMasksAndPadToMaxDimension(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] + min_dim = 50 + max_dim = 100 + expected_image_shape_list = [[100, 100, 3], [100, 100, 3]] + expected_masks_shape_list = [[15, 100, 100], [10, 100, 100]] + + for (in_image_shape, + expected_image_shape, in_masks_shape, expected_mask_shape) in zip( + in_image_shape_list, expected_image_shape_list, + in_masks_shape_list, expected_masks_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) + out_image, out_masks, _ = preprocessor.resize_to_range( + in_image, + in_masks, + min_dimension=min_dim, + max_dimension=max_dim, + pad_to_max_dimension=True) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape], + feed_dict={ + in_image: np.random.randn(*in_image_shape), + in_masks: np.random.randn(*in_masks_shape) + }) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToRangeWithMasksAndDynamicSpatialShape(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] + min_dim = 50 + max_dim = 100 + expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] + expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_range( + in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape], + feed_dict={ + in_image: np.random.randn(*in_image_shape), + in_masks: np.random.randn(*in_masks_shape) + }) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] + min_dim = 50 + max_dim = 100 + expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] + expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_range( + in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToRange4DImageTensor(self): + image = tf.random_uniform([1, 200, 300, 3]) + with self.assertRaises(ValueError): + preprocessor.resize_to_range(image, 500, 600) + + def testResizeToRangeSameMinMax(self): + """Tests image resizing, checking output sizes.""" + in_shape_list = [[312, 312, 3], [299, 299, 3]] + min_dim = 320 + max_dim = 320 + expected_shape_list = [[320, 320, 3], [320, 320, 3]] + + for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): + in_image = tf.random_uniform(in_shape) + out_image, _ = preprocessor.resize_to_range( + in_image, min_dimension=min_dim, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + + with self.test_session() as sess: + out_image_shape = sess.run(out_image_shape) + self.assertAllEqual(out_image_shape, expected_shape) + + def testResizeToMaxDimensionTensorShapes(self): + """Tests both cases where image should and shouldn't be resized.""" + in_image_shape_list = [[100, 50, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 100, 50], [10, 15, 30]] + max_dim = 50 + expected_image_shape_list = [[50, 25, 3], [15, 30, 3]] + expected_masks_shape_list = [[15, 50, 25], [10, 15, 30]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_max_dimension( + in_image, in_masks, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape], + feed_dict={ + in_image: np.random.randn(*in_image_shape), + in_masks: np.random.randn(*in_masks_shape) + }) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToMaxDimensionWithInstanceMasksTensorOfSizeZero(self): + """Tests both cases where image should and shouldn't be resized.""" + in_image_shape_list = [[100, 50, 3], [15, 30, 3]] + in_masks_shape_list = [[0, 100, 50], [0, 15, 30]] + max_dim = 50 + expected_image_shape_list = [[50, 25, 3], [15, 30, 3]] + expected_masks_shape_list = [[0, 50, 25], [0, 15, 30]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_max_dimension( + in_image, in_masks, max_dimension=max_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToMaxDimensionRaisesErrorOn4DImage(self): + image = tf.random_uniform([1, 200, 300, 3]) + with self.assertRaises(ValueError): + preprocessor.resize_to_max_dimension(image, 500) + + def testResizeToMinDimensionTensorShapes(self): + in_image_shape_list = [[60, 55, 3], [15, 30, 3]] + in_masks_shape_list = [[15, 60, 55], [10, 15, 30]] + min_dim = 50 + expected_image_shape_list = [[60, 55, 3], [50, 100, 3]] + expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) + in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_min_dimension( + in_image, in_masks, min_dimension=min_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape], + feed_dict={ + in_image: np.random.randn(*in_image_shape), + in_masks: np.random.randn(*in_masks_shape) + }) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self): + """Tests image resizing, checking output sizes.""" + in_image_shape_list = [[60, 40, 3], [15, 30, 3]] + in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] + min_dim = 50 + expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] + expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] + + for (in_image_shape, expected_image_shape, in_masks_shape, + expected_mask_shape) in zip(in_image_shape_list, + expected_image_shape_list, + in_masks_shape_list, + expected_masks_shape_list): + in_image = tf.random_uniform(in_image_shape) + in_masks = tf.random_uniform(in_masks_shape) + out_image, out_masks, _ = preprocessor.resize_to_min_dimension( + in_image, in_masks, min_dimension=min_dim) + out_image_shape = tf.shape(out_image) + out_masks_shape = tf.shape(out_masks) + + with self.test_session() as sess: + out_image_shape, out_masks_shape = sess.run( + [out_image_shape, out_masks_shape]) + self.assertAllEqual(out_image_shape, expected_image_shape) + self.assertAllEqual(out_masks_shape, expected_mask_shape) + + def testResizeToMinDimensionRaisesErrorOn4DImage(self): + image = tf.random_uniform([1, 200, 300, 3]) + with self.assertRaises(ValueError): + preprocessor.resize_to_min_dimension(image, 500) + + def testScaleBoxesToPixelCoordinates(self): + """Tests box scaling, checking scaled values.""" + in_shape = [60, 40, 3] + in_boxes = [[0.1, 0.2, 0.4, 0.6], + [0.5, 0.3, 0.9, 0.7]] + + expected_boxes = [[6., 8., 24., 24.], + [30., 12., 54., 28.]] + + in_image = tf.random_uniform(in_shape) + in_boxes = tf.constant(in_boxes) + _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates( + in_image, boxes=in_boxes) + with self.test_session() as sess: + out_boxes = sess.run(out_boxes) + self.assertAllClose(out_boxes, expected_boxes) + + def testScaleBoxesToPixelCoordinatesWithKeypoints(self): + """Tests box and keypoint scaling, checking scaled values.""" + in_shape = [60, 40, 3] + in_boxes = self.createTestBoxes() + in_keypoints = self.createTestKeypoints() + + expected_boxes = [[0., 10., 45., 40.], + [15., 20., 45., 40.]] + expected_keypoints = [ + [[6., 4.], [12., 8.], [18., 12.]], + [[24., 16.], [30., 20.], [36., 24.]], + ] + + in_image = tf.random_uniform(in_shape) + _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates( + in_image, boxes=in_boxes, keypoints=in_keypoints) + with self.test_session() as sess: + out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints]) + self.assertAllClose(out_boxes_, expected_boxes) + self.assertAllClose(out_keypoints_, expected_keypoints) + + def testSubtractChannelMean(self): + """Tests whether channel means have been subtracted.""" + with self.test_session(): + image = tf.zeros((240, 320, 3)) + means = [1, 2, 3] + actual = preprocessor.subtract_channel_mean(image, means=means) + actual = actual.eval() + + self.assertTrue((actual[:, :, 0] == -1).all()) + self.assertTrue((actual[:, :, 1] == -2).all()) + self.assertTrue((actual[:, :, 2] == -3).all()) + + def testOneHotEncoding(self): + """Tests one hot encoding of multiclass labels.""" + with self.test_session(): + labels = tf.constant([1, 4, 2], dtype=tf.int32) + one_hot = preprocessor.one_hot_encoding(labels, num_classes=5) + one_hot = one_hot.eval() + + self.assertAllEqual([0, 1, 1, 0, 1], one_hot) + + def testRandomSelfConcatImage(self): + tf.set_random_seed(24601) + + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + confidences = weights + scores = self.createTestMultiClassScores() + + tensor_dict = { + fields.InputDataFields.image: tf.cast(images, dtype=tf.float32), + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + fields.InputDataFields.groundtruth_confidences: confidences, + fields.InputDataFields.multiclass_scores: scores, + } + + preprocessing_options = [(preprocessor.random_self_concat_image, { + 'concat_vertical_probability': 0.5, + 'concat_horizontal_probability': 0.5, + 'seed': 24601, + })] + func_arg_map = preprocessor.get_default_func_arg_map( + True, True, True) + output_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=func_arg_map) + + final_shape = tf.shape(output_tensor_dict[fields.InputDataFields.image])[ + 1:3] + + with self.test_session() as sess: + outputs = [] + + augment_height_only = False + augment_width_only = False + + for _ in range(50): + original_boxes = sess.run(boxes) + shape, new_boxes, new_labels, new_confidences, new_scores = sess.run( + [final_shape, + output_tensor_dict[fields.InputDataFields.groundtruth_boxes], + output_tensor_dict[fields.InputDataFields.groundtruth_classes], + output_tensor_dict[fields.InputDataFields.groundtruth_confidences], + output_tensor_dict[fields.InputDataFields.multiclass_scores], + ]) + shape = np.array(shape) + outputs.append(shape) + + if np.array_equal(shape, [8, 4]): + augment_height_only = True + self.assertEqual( + new_boxes.shape[0], 2 * boxes.shape[0]) + + self.assertAllClose(new_boxes[:2, :] * [2.0, 1.0, 2.0, 1.0], + original_boxes) + self.assertAllClose( + (new_boxes[2:, :] - [0.5, 0.0, 0.5, 0.0]) * [ + 2.0, 1.0, 2.0, 1.0], + original_boxes) + elif np.array_equal(shape, [4, 8]): + augment_width_only = True + self.assertEqual( + new_boxes.shape[0], 2 * boxes.shape[0]) + + self.assertAllClose(new_boxes[:2, :] * [1.0, 2.0, 1.0, 2.0], + original_boxes) + self.assertAllClose( + (new_boxes[2:, :] - [0.0, 0.5, 0.0, 0.5]) * [ + 1.0, 2.0, 1.0, 2.0], + original_boxes) + + augmentation_factor = new_boxes.shape[0] / boxes.shape[0].value + self.assertEqual(new_labels.shape[0], + labels.shape[0].value * augmentation_factor) + self.assertEqual(new_confidences.shape[0], + confidences.shape[0].value * augmentation_factor) + self.assertEqual(new_scores.shape[0], + scores.shape[0].value * augmentation_factor) + + max_height = max(x[0] for x in outputs) + max_width = max(x[1] for x in outputs) + + self.assertEqual(max_height, 8) + self.assertEqual(max_width, 8) + self.assertEqual(augment_height_only, True) + self.assertEqual(augment_width_only, True) + + def testSSDRandomCropWithCache(self): + preprocess_options = [ + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), + (preprocessor.ssd_random_crop, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=False, + test_keypoints=False) + + def testSSDRandomCrop(self): + preprocessing_options = [ + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), + (preprocessor.ssd_random_crop, {})] + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = sess.run( + [boxes_rank, distorted_boxes_rank, images_rank, + distorted_images_rank]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testSSDRandomCropWithMultiClassScores(self): + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), (preprocessor.ssd_random_crop, {})] + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + multiclass_scores = self.createTestMultiClassScores() + + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.multiclass_scores: multiclass_scores, + fields.InputDataFields.groundtruth_weights: weights, + } + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_multiclass_scores=True) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + distorted_multiclass_scores = distorted_tensor_dict[ + fields.InputDataFields.multiclass_scores] + + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + multiclass_scores_rank = tf.rank(multiclass_scores) + distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_, multiclass_scores_rank_, + distorted_multiclass_scores_, + distorted_multiclass_scores_rank_) = sess.run([ + boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank, + distorted_images_rank, multiclass_scores_rank, + distorted_multiclass_scores, distorted_multiclass_scores_rank + ]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + self.assertAllEqual(multiclass_scores_rank_, + distorted_multiclass_scores_rank_) + self.assertAllEqual(distorted_boxes_.shape[0], + distorted_multiclass_scores_.shape[0]) + + def testSSDRandomCropPad(self): + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + preprocessing_options = [ + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), + (preprocessor.ssd_random_crop_pad, {})] + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights, + } + distorted_tensor_dict = preprocessor.preprocess(tensor_dict, + preprocessing_options) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = sess.run([ + boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank + ]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testSSDRandomCropFixedAspectRatioWithCache(self): + preprocess_options = [ + (preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), + (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})] + self._testPreprocessorCache(preprocess_options, + test_boxes=True, + test_masks=False, + test_keypoints=False) + + def _testSSDRandomCropFixedAspectRatio(self, + include_multiclass_scores, + include_instance_masks, + include_keypoints): + images = self.createTestImages() + boxes = self.createTestBoxes() + labels = self.createTestLabels() + weights = self.createTestGroundtruthWeights() + preprocessing_options = [(preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + }), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})] + tensor_dict = { + fields.InputDataFields.image: images, + fields.InputDataFields.groundtruth_boxes: boxes, + fields.InputDataFields.groundtruth_classes: labels, + fields.InputDataFields.groundtruth_weights: weights + } + if include_multiclass_scores: + multiclass_scores = self.createTestMultiClassScores() + tensor_dict[fields.InputDataFields.multiclass_scores] = ( + multiclass_scores) + if include_instance_masks: + masks = self.createTestMasks() + tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks + if include_keypoints: + keypoints = self.createTestKeypoints() + tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints + + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_multiclass_scores=include_multiclass_scores, + include_instance_masks=include_instance_masks, + include_keypoints=include_keypoints) + distorted_tensor_dict = preprocessor.preprocess( + tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) + distorted_images = distorted_tensor_dict[fields.InputDataFields.image] + distorted_boxes = distorted_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] + images_rank = tf.rank(images) + distorted_images_rank = tf.rank(distorted_images) + boxes_rank = tf.rank(boxes) + distorted_boxes_rank = tf.rank(distorted_boxes) + + with self.test_session() as sess: + (boxes_rank_, distorted_boxes_rank_, images_rank_, + distorted_images_rank_) = sess.run( + [boxes_rank, distorted_boxes_rank, images_rank, + distorted_images_rank]) + self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) + self.assertAllEqual(images_rank_, distorted_images_rank_) + + def testSSDRandomCropFixedAspectRatio(self): + self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False, + include_instance_masks=False, + include_keypoints=False) + + def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self): + self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=True, + include_instance_masks=False, + include_keypoints=False) + + def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self): + self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False, + include_instance_masks=True, + include_keypoints=True) + + def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self): + self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False, + include_instance_masks=True, + include_keypoints=True) + + def testConvertClassLogitsToSoftmax(self): + multiclass_scores = tf.constant( + [[1.0, 0.0], [0.5, 0.5], [1000, 1]], dtype=tf.float32) + temperature = 2.0 + + converted_multiclass_scores = ( + preprocessor.convert_class_logits_to_softmax( + multiclass_scores=multiclass_scores, temperature=temperature)) + + expected_converted_multiclass_scores = [[[0.62245935, 0.37754068], + [0.5, 0.5], [1, 0]]] + + with self.test_session() as sess: + (converted_multiclass_scores_) = sess.run([converted_multiclass_scores]) + + self.assertAllClose(converted_multiclass_scores_, + expected_converted_multiclass_scores) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/region_similarity_calculator.py b/core/region_similarity_calculator.py new file mode 100644 index 0000000..7b6e148 --- /dev/null +++ b/core/region_similarity_calculator.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Region Similarity Calculators for BoxLists. + +Region Similarity Calculators compare a pairwise measure of similarity +between the boxes in two BoxLists. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from abc import ABCMeta +from abc import abstractmethod + +import six +import tensorflow as tf + +from object_detection.core import box_list_ops +from object_detection.core import standard_fields as fields + + +class RegionSimilarityCalculator(six.with_metaclass(ABCMeta, object)): + """Abstract base class for region similarity calculator.""" + + def compare(self, boxlist1, boxlist2, scope=None): + """Computes matrix of pairwise similarity between BoxLists. + + This op (to be overridden) computes a measure of pairwise similarity between + the boxes in the given BoxLists. Higher values indicate more similarity. + + Note that this method simply measures similarity and does not explicitly + perform a matching. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + scope: Op scope name. Defaults to 'Compare' if None. + + Returns: + a (float32) tensor of shape [N, M] with pairwise similarity score. + """ + with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope: + return self._compare(boxlist1, boxlist2) + + @abstractmethod + def _compare(self, boxlist1, boxlist2): + pass + + +class IouSimilarity(RegionSimilarityCalculator): + """Class to compute similarity based on Intersection over Union (IOU) metric. + + This class computes pairwise similarity between two BoxLists based on IOU. + """ + + def _compare(self, boxlist1, boxlist2): + """Compute pairwise IOU similarity between the two BoxLists. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + + Returns: + A tensor with shape [N, M] representing pairwise iou scores. + """ + return box_list_ops.iou(boxlist1, boxlist2) + + +class NegSqDistSimilarity(RegionSimilarityCalculator): + """Class to compute similarity based on the squared distance metric. + + This class computes pairwise similarity between two BoxLists based on the + negative squared distance metric. + """ + + def _compare(self, boxlist1, boxlist2): + """Compute matrix of (negated) sq distances. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + + Returns: + A tensor with shape [N, M] representing negated pairwise squared distance. + """ + return -1 * box_list_ops.sq_dist(boxlist1, boxlist2) + + +class IoaSimilarity(RegionSimilarityCalculator): + """Class to compute similarity based on Intersection over Area (IOA) metric. + + This class computes pairwise similarity between two BoxLists based on their + pairwise intersections divided by the areas of second BoxLists. + """ + + def _compare(self, boxlist1, boxlist2): + """Compute pairwise IOA similarity between the two BoxLists. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + + Returns: + A tensor with shape [N, M] representing pairwise IOA scores. + """ + return box_list_ops.ioa(boxlist1, boxlist2) + + +class ThresholdedIouSimilarity(RegionSimilarityCalculator): + """Class to compute similarity based on thresholded IOU and score. + + This class computes pairwise similarity between two BoxLists based on IOU and + a 'score' present in boxlist1. If IOU > threshold, then the entry in the + output pairwise tensor will contain `score`, otherwise 0. + """ + + def __init__(self, iou_threshold=0): + """Initialize the ThresholdedIouSimilarity. + + Args: + iou_threshold: For a given pair of boxes, if the IOU is > iou_threshold, + then the comparison result will be the foreground probability of + the first box, otherwise it will be zero. + """ + super(ThresholdedIouSimilarity, self).__init__() + self._iou_threshold = iou_threshold + + def _compare(self, boxlist1, boxlist2): + """Compute pairwise IOU similarity between the two BoxLists and score. + + Args: + boxlist1: BoxList holding N boxes. Must have a score field. + boxlist2: BoxList holding M boxes. + + Returns: + A tensor with shape [N, M] representing scores threholded by pairwise + iou scores. + """ + ious = box_list_ops.iou(boxlist1, boxlist2) + scores = boxlist1.get_field(fields.BoxListFields.scores) + scores = tf.expand_dims(scores, axis=1) + row_replicated_scores = tf.tile(scores, [1, tf.shape(ious)[-1]]) + thresholded_ious = tf.where(ious > self._iou_threshold, + row_replicated_scores, tf.zeros_like(ious)) + + return thresholded_ious diff --git a/core/region_similarity_calculator_test.py b/core/region_similarity_calculator_test.py new file mode 100644 index 0000000..1d0c26b --- /dev/null +++ b/core/region_similarity_calculator_test.py @@ -0,0 +1,95 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for region_similarity_calculator.""" +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.core import region_similarity_calculator +from object_detection.core import standard_fields as fields + + +class RegionSimilarityCalculatorTest(tf.test.TestCase): + + def test_get_correct_pairwise_similarity_based_on_iou(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + iou_similarity_calculator = region_similarity_calculator.IouSimilarity() + iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2) + with self.test_session() as sess: + iou_output = sess.run(iou_similarity) + self.assertAllClose(iou_output, exp_output) + + def test_get_correct_pairwise_similarity_based_on_squared_distances(self): + corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], + [1.0, 1.0, 0.0, 2.0]]) + corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], + [-4.0, 0.0, 0.0, 3.0], + [0.0, 0.0, 0.0, 0.0]]) + exp_output = [[-26, -25, 0], [-18, -27, -6]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity() + dist_similarity = dist_similarity_calc.compare(boxes1, boxes2) + with self.test_session() as sess: + dist_output = sess.run(dist_similarity) + self.assertAllClose(dist_output, exp_output) + + def test_get_correct_pairwise_similarity_based_on_ioa(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], + [1.0 / 12.0, 0.0, 5.0 / 400.0]] + exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], + [0, 0], + [6.0 / 6.0, 5.0 / 5.0]] + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity() + ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2) + ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1) + with self.test_session() as sess: + iou_output_1, iou_output_2 = sess.run( + [ioa_similarity_1, ioa_similarity_2]) + self.assertAllClose(iou_output_1, exp_output_1) + self.assertAllClose(iou_output_2, exp_output_2) + + def test_get_correct_pairwise_similarity_based_on_thresholded_iou(self): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + scores = tf.constant([.3, .6]) + iou_threshold = .013 + + exp_output = tf.constant([[0.3, 0., 0.3], [0.6, 0., 0.]]) + boxes1 = box_list.BoxList(corners1) + boxes1.add_field(fields.BoxListFields.scores, scores) + boxes2 = box_list.BoxList(corners2) + iou_similarity_calculator = ( + region_similarity_calculator.ThresholdedIouSimilarity( + iou_threshold=iou_threshold)) + iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2) + with self.test_session() as sess: + iou_output = sess.run(iou_similarity) + self.assertAllClose(iou_output, exp_output) + + +if __name__ == '__main__': + tf.test.main() diff --git a/core/standard_fields.py b/core/standard_fields.py new file mode 100644 index 0000000..628902e --- /dev/null +++ b/core/standard_fields.py @@ -0,0 +1,263 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Contains classes specifying naming conventions used for object detection. + + +Specifies: + InputDataFields: standard fields used by reader/preprocessor/batcher. + DetectionResultFields: standard fields returned by object detector. + BoxListFields: standard field used by BoxList + TfExampleFields: standard fields for tf-example data format (go/tf-example). +""" + + +class InputDataFields(object): + """Names for the input tensors. + + Holds the standard data field names to use for identifying input tensors. This + should be used by the decoder to identify keys for the returned tensor_dict + containing input tensors. And it should be used by the model to identify the + tensors it needs. + + Attributes: + image: image. + image_additional_channels: additional channels. + original_image: image in the original input size. + original_image_spatial_shape: image in the original input size. + key: unique key corresponding to image. + source_id: source of the original image. + filename: original filename of the dataset (without common path). + groundtruth_image_classes: image-level class labels. + groundtruth_image_confidences: image-level class confidences. + groundtruth_boxes: coordinates of the ground truth boxes in the image. + groundtruth_classes: box-level class labels. + groundtruth_confidences: box-level class confidences. The shape should be + the same as the shape of groundtruth_classes. + groundtruth_label_types: box-level label types (e.g. explicit negative). + groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead] + is the groundtruth a single object or a crowd. + groundtruth_area: area of a groundtruth segment. + groundtruth_difficult: is a `difficult` object + groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the + same class, forming a connected group, where instances are heavily + occluding each other. + proposal_boxes: coordinates of object proposal boxes. + proposal_objectness: objectness score of each proposal. + groundtruth_instance_masks: ground truth instance masks. + groundtruth_instance_boundaries: ground truth instance boundaries. + groundtruth_instance_classes: instance mask-level class labels. + groundtruth_keypoints: ground truth keypoints. + groundtruth_keypoint_visibilities: ground truth keypoint visibilities. + groundtruth_label_weights: groundtruth label weights. + groundtruth_weights: groundtruth weight factor for bounding boxes. + num_groundtruth_boxes: number of groundtruth boxes. + is_annotated: whether an image has been labeled or not. + true_image_shapes: true shapes of images in the resized images, as resized + images can be padded with zeros. + multiclass_scores: the label score per class for each box. + """ + image = 'image' + image_additional_channels = 'image_additional_channels' + original_image = 'original_image' + original_image_spatial_shape = 'original_image_spatial_shape' + key = 'key' + source_id = 'source_id' + filename = 'filename' + groundtruth_image_classes = 'groundtruth_image_classes' + groundtruth_image_confidences = 'groundtruth_image_confidences' + groundtruth_boxes = 'groundtruth_boxes' + groundtruth_classes = 'groundtruth_classes' + groundtruth_confidences = 'groundtruth_confidences' + groundtruth_label_types = 'groundtruth_label_types' + groundtruth_is_crowd = 'groundtruth_is_crowd' + groundtruth_area = 'groundtruth_area' + groundtruth_difficult = 'groundtruth_difficult' + groundtruth_group_of = 'groundtruth_group_of' + proposal_boxes = 'proposal_boxes' + proposal_objectness = 'proposal_objectness' + groundtruth_instance_masks = 'groundtruth_instance_masks' + groundtruth_instance_boundaries = 'groundtruth_instance_boundaries' + groundtruth_instance_classes = 'groundtruth_instance_classes' + groundtruth_keypoints = 'groundtruth_keypoints' + groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' + groundtruth_label_weights = 'groundtruth_label_weights' + groundtruth_weights = 'groundtruth_weights' + num_groundtruth_boxes = 'num_groundtruth_boxes' + is_annotated = 'is_annotated' + true_image_shape = 'true_image_shape' + multiclass_scores = 'multiclass_scores' + + +class DetectionResultFields(object): + """Naming conventions for storing the output of the detector. + + Attributes: + source_id: source of the original image. + key: unique key corresponding to image. + detection_boxes: coordinates of the detection boxes in the image. + detection_scores: detection scores for the detection boxes in the image. + detection_multiclass_scores: class score distribution (including background) + for detection boxes in the image including background class. + detection_classes: detection-level class labels. + detection_masks: contains a segmentation mask for each detection box. + detection_boundaries: contains an object boundary for each detection box. + detection_keypoints: contains detection keypoints for each detection box. + num_detections: number of detections in the batch. + raw_detection_boxes: contains decoded detection boxes without Non-Max + suppression. + raw_detection_scores: contains class score logits for raw detection boxes. + detection_anchor_indices: The anchor indices of the detections after NMS. + detection_features: contains extracted features for each detected box + after NMS. + """ + + source_id = 'source_id' + key = 'key' + detection_boxes = 'detection_boxes' + detection_scores = 'detection_scores' + detection_multiclass_scores = 'detection_multiclass_scores' + detection_features = 'detection_features' + detection_classes = 'detection_classes' + detection_masks = 'detection_masks' + detection_boundaries = 'detection_boundaries' + detection_keypoints = 'detection_keypoints' + num_detections = 'num_detections' + raw_detection_boxes = 'raw_detection_boxes' + raw_detection_scores = 'raw_detection_scores' + detection_anchor_indices = 'detection_anchor_indices' + + +class BoxListFields(object): + """Naming conventions for BoxLists. + + Attributes: + boxes: bounding box coordinates. + classes: classes per bounding box. + scores: scores per bounding box. + weights: sample weights per bounding box. + objectness: objectness score per bounding box. + masks: masks per bounding box. + boundaries: boundaries per bounding box. + keypoints: keypoints per bounding box. + keypoint_heatmaps: keypoint heatmaps per bounding box. + is_crowd: is_crowd annotation per bounding box. + """ + boxes = 'boxes' + classes = 'classes' + scores = 'scores' + weights = 'weights' + confidences = 'confidences' + objectness = 'objectness' + masks = 'masks' + boundaries = 'boundaries' + keypoints = 'keypoints' + keypoint_heatmaps = 'keypoint_heatmaps' + is_crowd = 'is_crowd' + + +class PredictionFields(object): + """Naming conventions for standardized prediction outputs. + + Attributes: + feature_maps: List of feature maps for prediction. + anchors: Generated anchors. + raw_detection_boxes: Decoded detection boxes without NMS. + raw_detection_feature_map_indices: Feature map indices from which each raw + detection box was produced. + """ + feature_maps = 'feature_maps' + anchors = 'anchors' + raw_detection_boxes = 'raw_detection_boxes' + raw_detection_feature_map_indices = 'raw_detection_feature_map_indices' + + +class TfExampleFields(object): + """TF-example proto feature names for object detection. + + Holds the standard feature names to load from an Example proto for object + detection. + + Attributes: + image_encoded: JPEG encoded string + image_format: image format, e.g. "JPEG" + filename: filename + channels: number of channels of image + colorspace: colorspace, e.g. "RGB" + height: height of image in pixels, e.g. 462 + width: width of image in pixels, e.g. 581 + source_id: original source of the image + image_class_text: image-level label in text format + image_class_label: image-level label in numerical format + object_class_text: labels in text format, e.g. ["person", "cat"] + object_class_label: labels in numbers, e.g. [16, 8] + object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30 + object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40 + object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50 + object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70 + object_view: viewpoint of object, e.g. ["frontal", "left"] + object_truncated: is object truncated, e.g. [true, false] + object_occluded: is object occluded, e.g. [true, false] + object_difficult: is object difficult, e.g. [true, false] + object_group_of: is object a single object or a group of objects + object_depiction: is object a depiction + object_is_crowd: [DEPRECATED, use object_group_of instead] + is the object a single object or a crowd + object_segment_area: the area of the segment. + object_weight: a weight factor for the object's bounding box. + instance_masks: instance segmentation masks. + instance_boundaries: instance boundaries. + instance_classes: Classes for each instance segmentation mask. + detection_class_label: class label in numbers. + detection_bbox_ymin: ymin coordinates of a detection box. + detection_bbox_xmin: xmin coordinates of a detection box. + detection_bbox_ymax: ymax coordinates of a detection box. + detection_bbox_xmax: xmax coordinates of a detection box. + detection_score: detection score for the class label and box. + """ + image_encoded = 'image/encoded' + image_format = 'image/format' # format is reserved keyword + filename = 'image/filename' + channels = 'image/channels' + colorspace = 'image/colorspace' + height = 'image/height' + width = 'image/width' + source_id = 'image/source_id' + image_class_text = 'image/class/text' + image_class_label = 'image/class/label' + object_class_text = 'image/object/class/text' + object_class_label = 'image/object/class/label' + object_bbox_ymin = 'image/object/bbox/ymin' + object_bbox_xmin = 'image/object/bbox/xmin' + object_bbox_ymax = 'image/object/bbox/ymax' + object_bbox_xmax = 'image/object/bbox/xmax' + object_view = 'image/object/view' + object_truncated = 'image/object/truncated' + object_occluded = 'image/object/occluded' + object_difficult = 'image/object/difficult' + object_group_of = 'image/object/group_of' + object_depiction = 'image/object/depiction' + object_is_crowd = 'image/object/is_crowd' + object_segment_area = 'image/object/segment/area' + object_weight = 'image/object/weight' + instance_masks = 'image/segmentation/object' + instance_boundaries = 'image/boundaries/object' + instance_classes = 'image/segmentation/object/class' + detection_class_label = 'image/detection/label' + detection_bbox_ymin = 'image/detection/bbox/ymin' + detection_bbox_xmin = 'image/detection/bbox/xmin' + detection_bbox_ymax = 'image/detection/bbox/ymax' + detection_bbox_xmax = 'image/detection/bbox/xmax' + detection_score = 'image/detection/score' diff --git a/core/target_assigner.py b/core/target_assigner.py new file mode 100644 index 0000000..3e3ba1a --- /dev/null +++ b/core/target_assigner.py @@ -0,0 +1,707 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base target assigner module. + +The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and +groundtruth detections (bounding boxes), to assign classification and regression +targets to each anchor as well as weights to each anchor (specifying, e.g., +which anchors should not contribute to training loss). + +It assigns classification/regression targets by performing the following steps: +1) Computing pairwise similarity between anchors and groundtruth boxes using a + provided RegionSimilarity Calculator +2) Computing a matching based on the similarity matrix using a provided Matcher +3) Assigning regression targets based on the matching and a provided BoxCoder +4) Assigning classification targets based on the matching and groundtruth labels + +Note that TargetAssigners only operate on detections from a single +image at a time, so any logic for applying a TargetAssigner to multiple +images must be handled externally. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import range +from six.moves import zip +import tensorflow as tf + +from object_detection.box_coders import faster_rcnn_box_coder +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.core import box_coder +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import matcher as mat +from object_detection.core import region_similarity_calculator as sim_calc +from object_detection.core import standard_fields as fields +from object_detection.matchers import argmax_matcher +from object_detection.matchers import bipartite_matcher +from object_detection.utils import shape_utils + + +class TargetAssigner(object): + """Target assigner to compute classification and regression targets.""" + + def __init__(self, + similarity_calc, + matcher, + box_coder_instance, + negative_class_weight=1.0): + """Construct Object Detection Target Assigner. + + Args: + similarity_calc: a RegionSimilarityCalculator + matcher: an object_detection.core.Matcher used to match groundtruth to + anchors. + box_coder_instance: an object_detection.core.BoxCoder used to encode + matching groundtruth boxes with respect to anchors. + negative_class_weight: classification weight to be associated to negative + anchors (default: 1.0). The weight must be in [0., 1.]. + + Raises: + ValueError: if similarity_calc is not a RegionSimilarityCalculator or + if matcher is not a Matcher or if box_coder is not a BoxCoder + """ + if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator): + raise ValueError('similarity_calc must be a RegionSimilarityCalculator') + if not isinstance(matcher, mat.Matcher): + raise ValueError('matcher must be a Matcher') + if not isinstance(box_coder_instance, box_coder.BoxCoder): + raise ValueError('box_coder must be a BoxCoder') + self._similarity_calc = similarity_calc + self._matcher = matcher + self._box_coder = box_coder_instance + self._negative_class_weight = negative_class_weight + + @property + def box_coder(self): + return self._box_coder + + # TODO(rathodv): move labels, scores, and weights to groundtruth_boxes fields. + def assign(self, + anchors, + groundtruth_boxes, + groundtruth_labels=None, + unmatched_class_label=None, + groundtruth_weights=None): + """Assign classification and regression targets to each anchor. + + For a given set of anchors and groundtruth detections, match anchors + to groundtruth_boxes and assign classification and regression targets to + each anchor as well as weights based on the resulting match (specifying, + e.g., which anchors should not contribute to training loss). + + Anchors that are not matched to anything are given a classification target + of self._unmatched_cls_target which can be specified via the constructor. + + Args: + anchors: a BoxList representing N anchors + groundtruth_boxes: a BoxList representing M groundtruth boxes + groundtruth_labels: a tensor of shape [M, d_1, ... d_k] + with labels for each of the ground_truth boxes. The subshape + [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set + to None, groundtruth_labels assumes a binary problem where all + ground_truth boxes get a positive label (of 1). + unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + If set to None, unmatched_cls_target is set to be [0] for each anchor. + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. The weights + must be in [0., 1.]. If None, all weights are set to 1. Generally no + groundtruth boxes with zero weight match to any anchors as matchers are + aware of groundtruth weights. Additionally, `cls_weights` and + `reg_weights` are calculated using groundtruth weights as an added + safety. + + Returns: + cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], + where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels + which has shape [num_gt_boxes, d_1, d_2, ... d_k]. + cls_weights: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], + representing weights for each element in cls_targets. + reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] + reg_weights: a float32 tensor with shape [num_anchors] + match: an int32 tensor of shape [num_anchors] containing result of anchor + groundtruth matching. Each position in the tensor indicates an anchor + and holds the following meaning: + (1) if match[i] >= 0, anchor i is matched with groundtruth match[i]. + (2) if match[i]=-1, anchor i is marked to be background . + (3) if match[i]=-2, anchor i is ignored since it is not background and + does not have sufficient overlap to call it a foreground. + + Raises: + ValueError: if anchors or groundtruth_boxes are not of type + box_list.BoxList + """ + if not isinstance(anchors, box_list.BoxList): + raise ValueError('anchors must be an BoxList') + if not isinstance(groundtruth_boxes, box_list.BoxList): + raise ValueError('groundtruth_boxes must be an BoxList') + + if unmatched_class_label is None: + unmatched_class_label = tf.constant([0], tf.float32) + + if groundtruth_labels is None: + groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(), + 0)) + groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) + + unmatched_shape_assert = shape_utils.assert_shape_equal( + shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:], + shape_utils.combined_static_and_dynamic_shape(unmatched_class_label)) + labels_and_box_shapes_assert = shape_utils.assert_shape_equal( + shape_utils.combined_static_and_dynamic_shape( + groundtruth_labels)[:1], + shape_utils.combined_static_and_dynamic_shape( + groundtruth_boxes.get())[:1]) + + if groundtruth_weights is None: + num_gt_boxes = groundtruth_boxes.num_boxes_static() + if not num_gt_boxes: + num_gt_boxes = groundtruth_boxes.num_boxes() + groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32) + + # set scores on the gt boxes + scores = 1 - groundtruth_labels[:, 0] + groundtruth_boxes.add_field(fields.BoxListFields.scores, scores) + + with tf.control_dependencies( + [unmatched_shape_assert, labels_and_box_shapes_assert]): + match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, + anchors) + match = self._matcher.match(match_quality_matrix, + valid_rows=tf.greater(groundtruth_weights, 0)) + reg_targets = self._create_regression_targets(anchors, + groundtruth_boxes, + match) + cls_targets = self._create_classification_targets(groundtruth_labels, + unmatched_class_label, + match) + reg_weights = self._create_regression_weights(match, groundtruth_weights) + + cls_weights = self._create_classification_weights(match, + groundtruth_weights) + # convert cls_weights from per-anchor to per-class. + class_label_shape = tf.shape(cls_targets)[1:] + weights_shape = tf.shape(cls_weights) + weights_multiple = tf.concat( + [tf.ones_like(weights_shape), class_label_shape], + axis=0) + for _ in range(len(cls_targets.get_shape()[1:])): + cls_weights = tf.expand_dims(cls_weights, -1) + cls_weights = tf.tile(cls_weights, weights_multiple) + + num_anchors = anchors.num_boxes_static() + if num_anchors is not None: + reg_targets = self._reset_target_shape(reg_targets, num_anchors) + cls_targets = self._reset_target_shape(cls_targets, num_anchors) + reg_weights = self._reset_target_shape(reg_weights, num_anchors) + cls_weights = self._reset_target_shape(cls_weights, num_anchors) + + return (cls_targets, cls_weights, reg_targets, reg_weights, + match.match_results) + + def _reset_target_shape(self, target, num_anchors): + """Sets the static shape of the target. + + Args: + target: the target tensor. Its first dimension will be overwritten. + num_anchors: the number of anchors, which is used to override the target's + first dimension. + + Returns: + A tensor with the shape info filled in. + """ + target_shape = target.get_shape().as_list() + target_shape[0] = num_anchors + target.set_shape(target_shape) + return target + + def _create_regression_targets(self, anchors, groundtruth_boxes, match): + """Returns a regression target for each anchor. + + Args: + anchors: a BoxList representing N anchors + groundtruth_boxes: a BoxList representing M groundtruth_boxes + match: a matcher.Match object + + Returns: + reg_targets: a float32 tensor with shape [N, box_code_dimension] + """ + matched_gt_boxes = match.gather_based_on_match( + groundtruth_boxes.get(), + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) + if groundtruth_boxes.has_field(fields.BoxListFields.keypoints): + groundtruth_keypoints = groundtruth_boxes.get_field( + fields.BoxListFields.keypoints) + matched_keypoints = match.gather_based_on_match( + groundtruth_keypoints, + unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]), + ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) + matched_gt_boxlist.add_field(fields.BoxListFields.keypoints, + matched_keypoints) + matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) + match_results_shape = shape_utils.combined_static_and_dynamic_shape( + match.match_results) + + # Zero out the unmatched and ignored regression targets. + unmatched_ignored_reg_targets = tf.tile( + self._default_regression_target(), [match_results_shape[0], 1]) + matched_anchors_mask = match.matched_column_indicator() + reg_targets = tf.where(matched_anchors_mask, + matched_reg_targets, + unmatched_ignored_reg_targets) + return reg_targets + + def _default_regression_target(self): + """Returns the default target for anchors to regress to. + + Default regression targets are set to zero (though in + this implementation what these targets are set to should + not matter as the regression weight of any box set to + regress to the default target is zero). + + Returns: + default_target: a float32 tensor with shape [1, box_code_dimension] + """ + return tf.constant([self._box_coder.code_size*[0]], tf.float32) + + def _create_classification_targets(self, groundtruth_labels, + unmatched_class_label, match): + """Create classification targets for each anchor. + + Assign a classification target of for each anchor to the matching + groundtruth label that is provided by match. Anchors that are not matched + to anything are given the target self._unmatched_cls_target + + Args: + groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] + with labels for each of the ground_truth boxes. The subshape + [d_1, ... d_k] can be empty (corresponding to scalar labels). + unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + match: a matcher.Match object that provides a matching between anchors + and groundtruth boxes. + + Returns: + a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the + subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has + shape [num_gt_boxes, d_1, d_2, ... d_k]. + """ + return match.gather_based_on_match( + groundtruth_labels, + unmatched_value=unmatched_class_label, + ignored_value=unmatched_class_label) + + def _create_regression_weights(self, match, groundtruth_weights): + """Set regression weight for each anchor. + + Only positive anchors are set to contribute to the regression loss, so this + method returns a weight of 1 for every positive anchor and 0 for every + negative anchor. + + Args: + match: a matcher.Match object that provides a matching between anchors + and groundtruth boxes. + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. + + Returns: + a float32 tensor with shape [num_anchors] representing regression weights. + """ + return match.gather_based_on_match( + groundtruth_weights, ignored_value=0., unmatched_value=0.) + + def _create_classification_weights(self, + match, + groundtruth_weights): + """Create classification weights for each anchor. + + Positive (matched) anchors are associated with a weight of + positive_class_weight and negative (unmatched) anchors are associated with + a weight of negative_class_weight. When anchors are ignored, weights are set + to zero. By default, both positive/negative weights are set to 1.0, + but they can be adjusted to handle class imbalance (which is almost always + the case in object detection). + + Args: + match: a matcher.Match object that provides a matching between anchors + and groundtruth boxes. + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. + + Returns: + a float32 tensor with shape [num_anchors] representing classification + weights. + """ + return match.gather_based_on_match( + groundtruth_weights, + ignored_value=0., + unmatched_value=self._negative_class_weight) + + def get_box_coder(self): + """Get BoxCoder of this TargetAssigner. + + Returns: + BoxCoder object. + """ + return self._box_coder + + +# TODO(rathodv): This method pulls in all the implementation dependencies into +# core. Therefore its best to have this factory method outside of core. +def create_target_assigner(reference, stage=None, + negative_class_weight=1.0, use_matmul_gather=False): + """Factory function for creating standard target assigners. + + Args: + reference: string referencing the type of TargetAssigner. + stage: string denoting stage: {proposal, detection}. + negative_class_weight: classification weight to be associated to negative + anchors (default: 1.0) + use_matmul_gather: whether to use matrix multiplication based gather which + are better suited for TPUs. + + Returns: + TargetAssigner: desired target assigner. + + Raises: + ValueError: if combination reference+stage is invalid. + """ + if reference == 'Multibox' and stage == 'proposal': + similarity_calc = sim_calc.NegSqDistSimilarity() + matcher = bipartite_matcher.GreedyBipartiteMatcher() + box_coder_instance = mean_stddev_box_coder.MeanStddevBoxCoder() + + elif reference == 'FasterRCNN' and stage == 'proposal': + similarity_calc = sim_calc.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7, + unmatched_threshold=0.3, + force_match_for_each_row=True, + use_matmul_gather=use_matmul_gather) + box_coder_instance = faster_rcnn_box_coder.FasterRcnnBoxCoder( + scale_factors=[10.0, 10.0, 5.0, 5.0]) + + elif reference == 'FasterRCNN' and stage == 'detection': + similarity_calc = sim_calc.IouSimilarity() + # Uses all proposals with IOU < 0.5 as candidate negatives. + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + negatives_lower_than_unmatched=True, + use_matmul_gather=use_matmul_gather) + box_coder_instance = faster_rcnn_box_coder.FasterRcnnBoxCoder( + scale_factors=[10.0, 10.0, 5.0, 5.0]) + + elif reference == 'FastRCNN': + similarity_calc = sim_calc.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.1, + force_match_for_each_row=False, + negatives_lower_than_unmatched=False, + use_matmul_gather=use_matmul_gather) + box_coder_instance = faster_rcnn_box_coder.FasterRcnnBoxCoder() + + else: + raise ValueError('No valid combination of reference and stage.') + + return TargetAssigner(similarity_calc, matcher, box_coder_instance, + negative_class_weight=negative_class_weight) + + +def batch_assign(target_assigner, + anchors_batch, + gt_box_batch, + gt_class_targets_batch, + unmatched_class_label=None, + gt_weights_batch=None): + """Batched assignment of classification and regression targets. + + Args: + target_assigner: a target assigner. + anchors_batch: BoxList representing N box anchors or list of BoxList objects + with length batch_size representing anchor sets. + gt_box_batch: a list of BoxList objects with length batch_size + representing groundtruth boxes for each image in the batch + gt_class_targets_batch: a list of tensors with length batch_size, where + each tensor has shape [num_gt_boxes_i, classification_target_size] and + num_gt_boxes_i is the number of boxes in the ith boxlist of + gt_box_batch. + unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + gt_weights_batch: A list of 1-D tf.float32 tensors of shape + [num_boxes] containing weights for groundtruth boxes. + + Returns: + batch_cls_targets: a tensor with shape [batch_size, num_anchors, + num_classes], + batch_cls_weights: a tensor with shape [batch_size, num_anchors, + num_classes], + batch_reg_targets: a tensor with shape [batch_size, num_anchors, + box_code_dimension] + batch_reg_weights: a tensor with shape [batch_size, num_anchors], + match: an int32 tensor of shape [batch_size, num_anchors] containing result + of anchor groundtruth matching. Each position in the tensor indicates an + anchor and holds the following meaning: + (1) if match[x, i] >= 0, anchor i is matched with groundtruth match[x, i]. + (2) if match[x, i]=-1, anchor i is marked to be background . + (3) if match[x, i]=-2, anchor i is ignored since it is not background and + does not have sufficient overlap to call it a foreground. + + Raises: + ValueError: if input list lengths are inconsistent, i.e., + batch_size == len(gt_box_batch) == len(gt_class_targets_batch) + and batch_size == len(anchors_batch) unless anchors_batch is a single + BoxList. + """ + if not isinstance(anchors_batch, list): + anchors_batch = len(gt_box_batch) * [anchors_batch] + if not all( + isinstance(anchors, box_list.BoxList) for anchors in anchors_batch): + raise ValueError('anchors_batch must be a BoxList or list of BoxLists.') + if not (len(anchors_batch) + == len(gt_box_batch) + == len(gt_class_targets_batch)): + raise ValueError('batch size incompatible with lengths of anchors_batch, ' + 'gt_box_batch and gt_class_targets_batch.') + cls_targets_list = [] + cls_weights_list = [] + reg_targets_list = [] + reg_weights_list = [] + match_list = [] + if gt_weights_batch is None: + gt_weights_batch = [None] * len(gt_class_targets_batch) + for anchors, gt_boxes, gt_class_targets, gt_weights in zip( + anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch): + (cls_targets, cls_weights, + reg_targets, reg_weights, match) = target_assigner.assign( + anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights) + cls_targets_list.append(cls_targets) + cls_weights_list.append(cls_weights) + reg_targets_list.append(reg_targets) + reg_weights_list.append(reg_weights) + match_list.append(match) + batch_cls_targets = tf.stack(cls_targets_list) + batch_cls_weights = tf.stack(cls_weights_list) + batch_reg_targets = tf.stack(reg_targets_list) + batch_reg_weights = tf.stack(reg_weights_list) + batch_match = tf.stack(match_list) + return (batch_cls_targets, batch_cls_weights, batch_reg_targets, + batch_reg_weights, batch_match) + + +# Assign an alias to avoid large refactor of existing users. +batch_assign_targets = batch_assign + + +def batch_get_targets(batch_match, groundtruth_tensor_list, + groundtruth_weights_list, unmatched_value, + unmatched_weight): + """Returns targets based on anchor-groundtruth box matching results. + + Args: + batch_match: An int32 tensor of shape [batch, num_anchors] containing the + result of target assignment returned by TargetAssigner.assign(..). + groundtruth_tensor_list: A list of groundtruth tensors of shape + [num_groundtruth, d_1, d_2, ..., d_k]. The tensors can be of any type. + groundtruth_weights_list: A list of weights, one per groundtruth tensor, of + shape [num_groundtruth]. + unmatched_value: A tensor of shape [d_1, d_2, ..., d_k] of the same type as + groundtruth tensor containing target value for anchors that remain + unmatched. + unmatched_weight: Scalar weight to assign to anchors that remain unmatched. + + Returns: + targets: A tensor of shape [batch, num_anchors, d_1, d_2, ..., d_k] + containing targets for anchors. + weights: A float tensor of shape [batch, num_anchors] containing the weights + to assign to each target. + """ + match_list = tf.unstack(batch_match) + targets_list = [] + weights_list = [] + for match_tensor, groundtruth_tensor, groundtruth_weight in zip( + match_list, groundtruth_tensor_list, groundtruth_weights_list): + match_object = mat.Match(match_tensor) + targets = match_object.gather_based_on_match( + groundtruth_tensor, + unmatched_value=unmatched_value, + ignored_value=unmatched_value) + targets_list.append(targets) + weights = match_object.gather_based_on_match( + groundtruth_weight, + unmatched_value=unmatched_weight, + ignored_value=tf.zeros_like(unmatched_weight)) + weights_list.append(weights) + return tf.stack(targets_list), tf.stack(weights_list) + + +def batch_assign_confidences(target_assigner, + anchors_batch, + gt_box_batch, + gt_class_confidences_batch, + gt_weights_batch=None, + unmatched_class_label=None, + include_background_class=True, + implicit_class_weight=1.0): + """Batched assignment of classification and regression targets. + + This differences between batch_assign_confidences and batch_assign_targets: + - 'batch_assign_targets' supports scalar (agnostic), vector (multiclass) and + tensor (high-dimensional) targets. 'batch_assign_confidences' only support + scalar (agnostic) and vector (multiclass) targets. + - 'batch_assign_targets' assumes the input class tensor using the binary + one/K-hot encoding. 'batch_assign_confidences' takes the class confidence + scores as the input, where 1 means positive classes, 0 means implicit + negative classes, and -1 means explicit negative classes. + - 'batch_assign_confidences' assigns the targets in the similar way as + 'batch_assign_targets' except that it gives different weights for implicit + and explicit classes. This allows user to control the negative gradients + pushed differently for implicit and explicit examples during the training. + + Args: + target_assigner: a target assigner. + anchors_batch: BoxList representing N box anchors or list of BoxList objects + with length batch_size representing anchor sets. + gt_box_batch: a list of BoxList objects with length batch_size + representing groundtruth boxes for each image in the batch + gt_class_confidences_batch: a list of tensors with length batch_size, where + each tensor has shape [num_gt_boxes_i, classification_target_size] and + num_gt_boxes_i is the number of boxes in the ith boxlist of + gt_box_batch. Note that in this tensor, 1 means explicit positive class, + -1 means explicit negative class, and 0 means implicit negative class. + gt_weights_batch: A list of 1-D tf.float32 tensors of shape + [num_gt_boxes_i] containing weights for groundtruth boxes. + unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + include_background_class: whether or not gt_class_confidences_batch includes + the background class. + implicit_class_weight: the weight assigned to implicit examples. + + Returns: + batch_cls_targets: a tensor with shape [batch_size, num_anchors, + num_classes], + batch_cls_weights: a tensor with shape [batch_size, num_anchors, + num_classes], + batch_reg_targets: a tensor with shape [batch_size, num_anchors, + box_code_dimension] + batch_reg_weights: a tensor with shape [batch_size, num_anchors], + match: an int32 tensor of shape [batch_size, num_anchors] containing result + of anchor groundtruth matching. Each position in the tensor indicates an + anchor and holds the following meaning: + (1) if match[x, i] >= 0, anchor i is matched with groundtruth match[x, i]. + (2) if match[x, i]=-1, anchor i is marked to be background . + (3) if match[x, i]=-2, anchor i is ignored since it is not background and + does not have sufficient overlap to call it a foreground. + + Raises: + ValueError: if input list lengths are inconsistent, i.e., + batch_size == len(gt_box_batch) == len(gt_class_targets_batch) + and batch_size == len(anchors_batch) unless anchors_batch is a single + BoxList, or if any element in gt_class_confidences_batch has rank > 2. + """ + if not isinstance(anchors_batch, list): + anchors_batch = len(gt_box_batch) * [anchors_batch] + if not all( + isinstance(anchors, box_list.BoxList) for anchors in anchors_batch): + raise ValueError('anchors_batch must be a BoxList or list of BoxLists.') + if not (len(anchors_batch) + == len(gt_box_batch) + == len(gt_class_confidences_batch)): + raise ValueError('batch size incompatible with lengths of anchors_batch, ' + 'gt_box_batch and gt_class_confidences_batch.') + + cls_targets_list = [] + cls_weights_list = [] + reg_targets_list = [] + reg_weights_list = [] + match_list = [] + if gt_weights_batch is None: + gt_weights_batch = [None] * len(gt_class_confidences_batch) + for anchors, gt_boxes, gt_class_confidences, gt_weights in zip( + anchors_batch, gt_box_batch, gt_class_confidences_batch, + gt_weights_batch): + + if (gt_class_confidences is not None and + len(gt_class_confidences.get_shape().as_list()) > 2): + raise ValueError('The shape of the class target is not supported. ', + gt_class_confidences.get_shape()) + + cls_targets, _, reg_targets, _, match = target_assigner.assign( + anchors, gt_boxes, gt_class_confidences, unmatched_class_label, + groundtruth_weights=gt_weights) + + if include_background_class: + cls_targets_without_background = tf.slice( + cls_targets, [0, 1], [-1, -1]) + else: + cls_targets_without_background = cls_targets + + positive_mask = tf.greater(cls_targets_without_background, 0.0) + negative_mask = tf.less(cls_targets_without_background, 0.0) + explicit_example_mask = tf.logical_or(positive_mask, negative_mask) + positive_anchors = tf.reduce_any(positive_mask, axis=-1) + + regression_weights = tf.cast(positive_anchors, dtype=tf.float32) + regression_targets = ( + reg_targets * tf.expand_dims(regression_weights, axis=-1)) + regression_weights_expanded = tf.expand_dims(regression_weights, axis=-1) + + cls_targets_without_background = ( + cls_targets_without_background * + (1 - tf.cast(negative_mask, dtype=tf.float32))) + cls_weights_without_background = ((1 - implicit_class_weight) * tf.cast( + explicit_example_mask, dtype=tf.float32) + implicit_class_weight) + + if include_background_class: + cls_weights_background = ( + (1 - implicit_class_weight) * regression_weights_expanded + + implicit_class_weight) + classification_weights = tf.concat( + [cls_weights_background, cls_weights_without_background], axis=-1) + cls_targets_background = 1 - regression_weights_expanded + classification_targets = tf.concat( + [cls_targets_background, cls_targets_without_background], axis=-1) + else: + classification_targets = cls_targets_without_background + classification_weights = cls_weights_without_background + + cls_targets_list.append(classification_targets) + cls_weights_list.append(classification_weights) + reg_targets_list.append(regression_targets) + reg_weights_list.append(regression_weights) + match_list.append(match) + batch_cls_targets = tf.stack(cls_targets_list) + batch_cls_weights = tf.stack(cls_weights_list) + batch_reg_targets = tf.stack(reg_targets_list) + batch_reg_weights = tf.stack(reg_weights_list) + batch_match = tf.stack(match_list) + return (batch_cls_targets, batch_cls_weights, batch_reg_targets, + batch_reg_weights, batch_match) + + diff --git a/core/target_assigner_test.py b/core/target_assigner_test.py new file mode 100644 index 0000000..1ac67f2 --- /dev/null +++ b/core/target_assigner_test.py @@ -0,0 +1,1232 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.target_assigner.""" +import numpy as np +import tensorflow as tf + +from object_detection.box_coders import keypoint_box_coder +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.core import box_list +from object_detection.core import region_similarity_calculator +from object_detection.core import standard_fields as fields +from object_detection.core import target_assigner as targetassigner +from object_detection.matchers import argmax_matcher +from object_detection.matchers import bipartite_matcher +from object_detection.utils import test_case + + +class TargetAssignerTest(test_case.TestCase): + + def test_assign_agnostic(self): + def graph_fn(anchor_means, groundtruth_box_corners): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = target_assigner.assign( + anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9]], + dtype=np.float32) + exp_cls_targets = [[1], [1], [0]] + exp_cls_weights = [[1], [1], [1]] + exp_reg_targets = [[0, 0, 0, 0], + [0, 0, -1, 1], + [0, 0, 0, 0]] + exp_reg_weights = [1, 1, 0] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_class_agnostic_with_ignored_matches(self): + # Note: test is very similar to above. The third box matched with an IOU + # of 0.35, which is between the matched and unmatched threshold. This means + # That like above the expected classification targets are [1, 1, 0]. + # Unlike above, the third target is ignored and therefore expected + # classification weights are [1, 1, 0]. + def graph_fn(anchor_means, groundtruth_box_corners): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.3) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = target_assigner.assign( + anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0.0, 0.5, .9, 1.0]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9]], dtype=np.float32) + exp_cls_targets = [[1], [1], [0]] + exp_cls_weights = [[1], [1], [0]] + exp_reg_targets = [[0, 0, 0, 0], + [0, 0, -1, 1], + [0, 0, 0, 0]] + exp_reg_weights = [1, 1, 0] + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_agnostic_with_keypoints(self): + def graph_fn(anchor_means, groundtruth_box_corners, + groundtruth_keypoints): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = keypoint_box_coder.KeypointBoxCoder( + num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0]) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + groundtruth_boxlist.add_field(fields.BoxListFields.keypoints, + groundtruth_keypoints) + result = target_assigner.assign( + anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 1.0], + [0.0, 0.5, .9, 1.0]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.45, 0.45, 0.95, 0.95]], + dtype=np.float32) + groundtruth_keypoints = np.array( + [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]], + [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]], + dtype=np.float32) + exp_cls_targets = [[1], [1], [0]] + exp_cls_weights = [[1], [1], [1]] + exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13, + -5], + [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11, + -11, -7], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + exp_reg_weights = [1, 1, 0] + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [anchor_means, + groundtruth_box_corners, + groundtruth_keypoints]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self): + # Note: test is very similar to above. The third box matched with an IOU + # of 0.35, which is between the matched and unmatched threshold. This means + # That like above the expected classification targets are [1, 1, 0]. + # Unlike above, the third target is ignored and therefore expected + # classification weights are [1, 1, 0]. + def graph_fn(anchor_means, groundtruth_box_corners, + groundtruth_keypoints): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = keypoint_box_coder.KeypointBoxCoder( + num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0]) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + groundtruth_boxlist.add_field(fields.BoxListFields.keypoints, + groundtruth_keypoints) + result = target_assigner.assign( + anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 1.0], + [0.0, 0.5, .9, 1.0]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.45, 0.45, 0.95, 0.95]], + dtype=np.float32) + groundtruth_keypoints = np.array( + [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]], + [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]], + dtype=np.float32) + exp_cls_targets = [[1], [1], [0]] + exp_cls_weights = [[1], [1], [1]] + exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13, + -5], + [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11, + -11, -7], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + exp_reg_weights = [1, 1, 0] + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [anchor_means, + groundtruth_box_corners, + groundtruth_keypoints]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_multiclass(self): + + def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = target_assigner.assign( + anchors_boxlist, + groundtruth_boxlist, + groundtruth_labels, + unmatched_class_label=unmatched_class_label) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0], + [.75, 0, 1.0, .25]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9], + [.75, 0, .95, .27]], dtype=np.float32) + groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32) + + exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0]] + exp_cls_weights = [[1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1]] + exp_reg_targets = [[0, 0, 0, 0], + [0, 0, -1, 1], + [0, 0, 0, 0], + [0, 0, -.5, .2]] + exp_reg_weights = [1, 1, 0, 1] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_multiclass_with_groundtruth_weights(self): + + def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels, + groundtruth_weights): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = target_assigner.assign( + anchors_boxlist, + groundtruth_boxlist, + groundtruth_labels, + unmatched_class_label=unmatched_class_label, + groundtruth_weights=groundtruth_weights) + (_, cls_weights, _, reg_weights, _) = result + return (cls_weights, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0], + [.75, 0, 1.0, .25]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9], + [.75, 0, .95, .27]], dtype=np.float32) + groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32) + groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32) + + # background class gets weight of 1. + exp_cls_weights = [[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3], + [0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1], + [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]] + exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0. + + (cls_weights_out, reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_box_corners, groundtruth_labels, + groundtruth_weights + ]) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_assign_multidimensional_class_targets(self): + + def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + + unmatched_class_label = tf.constant([[0, 0], [0, 0]], tf.float32) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = target_assigner.assign( + anchors_boxlist, + groundtruth_boxlist, + groundtruth_labels, + unmatched_class_label=unmatched_class_label) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0], + [.75, 0, 1.0, .25]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9], + [.75, 0, .95, .27]], dtype=np.float32) + + groundtruth_labels = np.array([[[0, 1], [1, 0]], + [[1, 0], [0, 1]], + [[0, 1], [1, .5]]], np.float32) + + exp_cls_targets = [[[0, 1], [1, 0]], + [[1, 0], [0, 1]], + [[0, 0], [0, 0]], + [[0, 1], [1, .5]]] + exp_cls_weights = [[[1, 1], [1, 1]], + [[1, 1], [1, 1]], + [[1, 1], [1, 1]], + [[1, 1], [1, 1]]] + exp_reg_targets = [[0, 0, 0, 0], + [0, 0, -1, 1], + [0, 0, 0, 0], + [0, 0, -.5, .2]] + exp_reg_weights = [1, 1, 0, 1] + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_assign_empty_groundtruth(self): + + def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + unmatched_class_label = tf.constant([0, 0, 0], tf.float32) + anchors_boxlist = box_list.BoxList(anchor_means) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + result = target_assigner.assign( + anchors_boxlist, + groundtruth_boxlist, + groundtruth_labels, + unmatched_class_label=unmatched_class_label) + (cls_targets, cls_weights, reg_targets, reg_weights, _) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32) + groundtruth_labels = np.zeros((0, 3), dtype=np.float32) + anchor_means = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0], + [.75, 0, 1.0, .25]], + dtype=np.float32) + exp_cls_targets = [[0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0]] + exp_cls_weights = [[1, 1, 1], + [1, 1, 1], + [1, 1, 1], + [1, 1, 1]] + exp_reg_targets = [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]] + exp_reg_weights = [0, 0, 0, 0] + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEquals(cls_targets_out.dtype, np.float32) + self.assertEquals(cls_weights_out.dtype, np.float32) + self.assertEquals(reg_targets_out.dtype, np.float32) + self.assertEquals(reg_weights_out.dtype, np.float32) + + def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self): + similarity_calc = region_similarity_calculator.NegSqDistSimilarity() + matcher = bipartite_matcher.GreedyBipartiteMatcher() + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() + unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 0.8], + [0, 0.5, .5, 1.0], + [.75, 0, 1.0, .25]]) + priors = box_list.BoxList(prior_means) + + box_corners = [[0.0, 0.0, 0.5, 0.5], + [0.0, 0.0, 0.5, 0.8], + [0.5, 0.5, 0.9, 0.9], + [.75, 0, .95, .27]] + boxes = box_list.BoxList(tf.constant(box_corners)) + + groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 0, 0]], tf.float32) + with self.assertRaisesRegexp(ValueError, 'Unequal shapes'): + target_assigner.assign( + priors, + boxes, + groundtruth_labels, + unmatched_class_label=unmatched_class_label) + + def test_raises_error_on_invalid_groundtruth_labels(self): + similarity_calc = region_similarity_calculator.NegSqDistSimilarity() + matcher = bipartite_matcher.GreedyBipartiteMatcher() + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0) + unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32) + target_assigner = targetassigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]]) + priors = box_list.BoxList(prior_means) + + box_corners = [[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9], + [.75, 0, .95, .27]] + boxes = box_list.BoxList(tf.constant(box_corners)) + groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32) + + with self.assertRaises(ValueError): + target_assigner.assign( + priors, + boxes, + groundtruth_labels, + unmatched_class_label=unmatched_class_label) + + +class BatchTargetAssignerTest(test_case.TestCase): + + def _get_target_assigner(self): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + return targetassigner.TargetAssigner(similarity_calc, matcher, box_coder) + + def test_batch_assign_targets(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_targets = [None, None] + anchors_boxlist = box_list.BoxList(anchor_means) + agnostic_target_assigner = self._get_target_assigner() + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_targets( + agnostic_target_assigner, anchors_boxlist, gt_box_batch, + gt_class_targets) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + exp_cls_targets = [[[1], [0], [0], [0]], + [[0], [1], [1], [0]]] + exp_cls_weights = [[[1], [1], [1], [1]], + [[1], [1], [1], [1]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0.15789001, -0.01500003, 0.57889998, -1.15799987], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 1, 0]] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_multiclass_targets(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_targets = [class_targets1, class_targets2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + num_classes = 3 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_targets( + multiclass_target_assigner, anchors_boxlist, gt_box_batch, + gt_class_targets, unmatched_class_label) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32) + class_targets2 = np.array([[0, 0, 0, 1], + [0, 0, 1, 0]], dtype=np.float32) + + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + exp_cls_targets = [[[0, 1, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0]], + [[1, 0, 0, 0], + [0, 0, 0, 1], + [0, 0, 1, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1]], + [[1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0.15789001, -0.01500003, 0.57889998, -1.15799987], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 1, 0]] + + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2 + ]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_multiclass_targets_with_padded_groundtruth(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2, groundtruth_weights1, + groundtruth_weights2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_targets = [class_targets1, class_targets2] + gt_weights = [groundtruth_weights1, groundtruth_weights2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + num_classes = 3 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_targets( + multiclass_target_assigner, anchors_boxlist, gt_box_batch, + gt_class_targets, unmatched_class_label, gt_weights) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2], + [0., 0., 0., 0.]], dtype=np.float32) + groundtruth_weights1 = np.array([1, 0], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842], + [0, 0, 0, 0]], + dtype=np.float32) + groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32) + class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32) + class_targets2 = np.array([[0, 0, 0, 1], + [0, 0, 1, 0], + [0, 0, 0, 0]], dtype=np.float32) + + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + exp_cls_targets = [[[0, 1, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0]], + [[1, 0, 0, 0], + [0, 0, 0, 1], + [0, 0, 1, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1]], + [[1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0.15789001, -0.01500003, 0.57889998, -1.15799987], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 1, 0]] + + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2, groundtruth_weights1, + groundtruth_weights2 + ]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_multidimensional_targets(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_targets = [class_targets1, class_targets2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + target_dimensions = (2, 3) + unmatched_class_label = tf.constant(np.zeros(target_dimensions), + tf.float32) + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_targets( + multiclass_target_assigner, anchors_boxlist, gt_box_batch, + gt_class_targets, unmatched_class_label) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + class_targets1 = np.array([[[0, 1, 1], + [1, 1, 0]]], dtype=np.float32) + class_targets2 = np.array([[[0, 1, 1], + [1, 1, 0]], + [[0, 0, 1], + [0, 0, 1]]], dtype=np.float32) + + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + exp_cls_targets = [[[[0., 1., 1.], + [1., 1., 0.]], + [[0., 0., 0.], + [0., 0., 0.]], + [[0., 0., 0.], + [0., 0., 0.]], + [[0., 0., 0.], + [0., 0., 0.]]], + [[[0., 0., 0.], + [0., 0., 0.]], + [[0., 1., 1.], + [1., 1., 0.]], + [[0., 0., 1.], + [0., 0., 1.]], + [[0., 0., 0.], + [0., 0., 0.]]]] + exp_cls_weights = [[[[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]]], + [[[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]], + [[1., 1., 1.], + [1., 1., 1.]]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0.15789001, -0.01500003, 0.57889998, -1.15799987], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 1, 0]] + + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2 + ]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_empty_groundtruth(self): + + def graph_fn(anchor_means, groundtruth_box_corners, gt_class_targets): + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + gt_box_batch = [groundtruth_boxlist] + gt_class_targets_batch = [gt_class_targets] + anchors_boxlist = box_list.BoxList(anchor_means) + + multiclass_target_assigner = self._get_target_assigner() + num_classes = 3 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_targets( + multiclass_target_assigner, anchors_boxlist, + gt_box_batch, gt_class_targets_batch, unmatched_class_label) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32) + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1]], dtype=np.float32) + exp_cls_targets = [[[1, 0, 0, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[1, 1, 1, 1], + [1, 1, 1, 1]]] + exp_reg_targets = [[[0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_reg_weights = [[0, 0]] + num_classes = 3 + pad = 1 + gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32) + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_box_corners, gt_class_targets]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + +class BatchGetTargetsTest(test_case.TestCase): + + def test_scalar_targets(self): + batch_match = np.array([[1, 0, 1], + [-2, -1, 1]], dtype=np.int32) + groundtruth_tensors_list = np.array([[11, 12], [13, 14]], dtype=np.int32) + groundtruth_weights_list = np.array([[1.0, 1.0], [1.0, 0.5]], + dtype=np.float32) + unmatched_value = np.array(99, dtype=np.int32) + unmatched_weight = np.array(0.0, dtype=np.float32) + + def graph_fn(batch_match, groundtruth_tensors_list, + groundtruth_weights_list, unmatched_value, unmatched_weight): + targets, weights = targetassigner.batch_get_targets( + batch_match, tf.unstack(groundtruth_tensors_list), + tf.unstack(groundtruth_weights_list), + unmatched_value, unmatched_weight) + return (targets, weights) + + (targets_np, weights_np) = self.execute(graph_fn, [ + batch_match, groundtruth_tensors_list, groundtruth_weights_list, + unmatched_value, unmatched_weight + ]) + self.assertAllEqual([[12, 11, 12], + [99, 99, 14]], targets_np) + self.assertAllClose([[1.0, 1.0, 1.0], + [0.0, 0.0, 0.5]], weights_np) + + def test_1d_targets(self): + batch_match = np.array([[1, 0, 1], + [-2, -1, 1]], dtype=np.int32) + groundtruth_tensors_list = np.array([[[11, 12], [12, 13]], + [[13, 14], [14, 15]]], + dtype=np.float32) + groundtruth_weights_list = np.array([[1.0, 1.0], [1.0, 0.5]], + dtype=np.float32) + unmatched_value = np.array([99, 99], dtype=np.float32) + unmatched_weight = np.array(0.0, dtype=np.float32) + + def graph_fn(batch_match, groundtruth_tensors_list, + groundtruth_weights_list, unmatched_value, unmatched_weight): + targets, weights = targetassigner.batch_get_targets( + batch_match, tf.unstack(groundtruth_tensors_list), + tf.unstack(groundtruth_weights_list), + unmatched_value, unmatched_weight) + return (targets, weights) + + (targets_np, weights_np) = self.execute(graph_fn, [ + batch_match, groundtruth_tensors_list, groundtruth_weights_list, + unmatched_value, unmatched_weight + ]) + self.assertAllClose([[[12, 13], [11, 12], [12, 13]], + [[99, 99], [99, 99], [14, 15]]], targets_np) + self.assertAllClose([[1.0, 1.0, 1.0], + [0.0, 0.0, 0.5]], weights_np) + + +class BatchTargetAssignConfidencesTest(test_case.TestCase): + + def _get_target_assigner(self): + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, + unmatched_threshold=0.5) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + return targetassigner.TargetAssigner(similarity_calc, matcher, box_coder) + + def test_batch_assign_empty_groundtruth(self): + + def graph_fn(anchor_means, groundtruth_box_corners, gt_class_confidences): + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + gt_box_batch = [groundtruth_boxlist] + gt_class_confidences_batch = [gt_class_confidences] + anchors_boxlist = box_list.BoxList(anchor_means) + + num_classes = 3 + implicit_class_weight = 0.5 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + multiclass_target_assigner = self._get_target_assigner() + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_confidences( + multiclass_target_assigner, + anchors_boxlist, + gt_box_batch, + gt_class_confidences_batch, + unmatched_class_label=unmatched_class_label, + include_background_class=True, + implicit_class_weight=implicit_class_weight) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32) + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1]], dtype=np.float32) + num_classes = 3 + pad = 1 + gt_class_confidences = np.zeros((0, num_classes + pad), dtype=np.float32) + + exp_cls_targets = [[[1, 0, 0, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5]]] + exp_reg_targets = [[[0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_reg_weights = [[0, 0]] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, + [anchor_means, groundtruth_box_corners, gt_class_confidences]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_confidences_agnostic(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_confidences_batch = [None, None] + anchors_boxlist = box_list.BoxList(anchor_means) + agnostic_target_assigner = self._get_target_assigner() + implicit_class_weight = 0.5 + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_confidences( + agnostic_target_assigner, + anchors_boxlist, + gt_box_batch, + gt_class_confidences_batch, + include_background_class=False, + implicit_class_weight=implicit_class_weight) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + exp_cls_targets = [[[1], [0], [0], [0]], + [[0], [1], [1], [0]]] + exp_cls_weights = [[[1], [0.5], [0.5], [0.5]], + [[0.5], [1], [1], [0.5]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0.15789001, -0.01500003, 0.57889998, -1.15799987], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 1, 0]] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute( + graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_confidences_multiclass(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_confidences_batch = [class_targets1, class_targets2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + num_classes = 3 + implicit_class_weight = 0.5 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_confidences( + multiclass_target_assigner, + anchors_boxlist, + gt_box_batch, + gt_class_confidences_batch, + unmatched_class_label=unmatched_class_label, + include_background_class=True, + implicit_class_weight=implicit_class_weight) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32) + class_targets2 = np.array([[0, 0, 0, 1], + [0, 0, -1, 0]], dtype=np.float32) + + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + exp_cls_targets = [[[0, 1, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0]], + [[1, 0, 0, 0], + [0, 0, 0, 1], + [1, 0, 0, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[1, 1, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5]], + [[0.5, 0.5, 0.5, 0.5], + [1, 0.5, 0.5, 1], + [0.5, 0.5, 1, 0.5], + [0.5, 0.5, 0.5, 0.5]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 0, 0]] + + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2 + ]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_confidences_multiclass_with_padded_groundtruth(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2, groundtruth_weights1, + groundtruth_weights2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_confidences_batch = [class_targets1, class_targets2] + gt_weights = [groundtruth_weights1, groundtruth_weights2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + num_classes = 3 + unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32) + implicit_class_weight = 0.5 + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_confidences( + multiclass_target_assigner, + anchors_boxlist, + gt_box_batch, + gt_class_confidences_batch, + gt_weights, + unmatched_class_label=unmatched_class_label, + include_background_class=True, + implicit_class_weight=implicit_class_weight) + + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2], + [0., 0., 0., 0.]], dtype=np.float32) + groundtruth_weights1 = np.array([1, 0], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842], + [0, 0, 0, 0]], + dtype=np.float32) + groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32) + class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32) + class_targets2 = np.array([[0, 0, 0, 1], + [0, 0, -1, 0], + [0, 0, 0, 0]], dtype=np.float32) + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + exp_cls_targets = [[[0, 1, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0], + [1, 0, 0, 0]], + [[1, 0, 0, 0], + [0, 0, 0, 1], + [1, 0, 0, 0], + [1, 0, 0, 0]]] + exp_cls_weights = [[[1, 1, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5]], + [[0.5, 0.5, 0.5, 0.5], + [1, 0.5, 0.5, 1], + [0.5, 0.5, 1, 0.5], + [0.5, 0.5, 0.5, 0.5]]] + exp_reg_targets = [[[0, 0, -0.5, -0.5], + [0, 0, 0, 0], + [0, 0, 0, 0,], + [0, 0, 0, 0,],], + [[0, 0, 0, 0,], + [0, 0.01231521, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 0, 0, 0], + [0, 1, 0, 0]] + + (cls_targets_out, cls_weights_out, reg_targets_out, + reg_weights_out) = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2, groundtruth_weights1, + groundtruth_weights2 + ]) + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + + def test_batch_assign_confidences_multidimensional(self): + + def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2): + box_list1 = box_list.BoxList(groundtruth_boxlist1) + box_list2 = box_list.BoxList(groundtruth_boxlist2) + gt_box_batch = [box_list1, box_list2] + gt_class_confidences_batch = [class_targets1, class_targets2] + anchors_boxlist = box_list.BoxList(anchor_means) + multiclass_target_assigner = self._get_target_assigner() + target_dimensions = (2, 3) + unmatched_class_label = tf.constant(np.zeros(target_dimensions), + tf.float32) + implicit_class_weight = 0.5 + (cls_targets, cls_weights, reg_targets, reg_weights, + _) = targetassigner.batch_assign_confidences( + multiclass_target_assigner, + anchors_boxlist, + gt_box_batch, + gt_class_confidences_batch, + unmatched_class_label=unmatched_class_label, + include_background_class=True, + implicit_class_weight=implicit_class_weight) + return (cls_targets, cls_weights, reg_targets, reg_weights) + + groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32) + groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1], + [0.015789, 0.0985, 0.55789, 0.3842]], + dtype=np.float32) + class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32) + class_targets2 = np.array([[0, 0, 0, 1], + [0, 0, 1, 0]], dtype=np.float32) + class_targets1 = np.array([[[0, 1, 1], + [1, 1, 0]]], dtype=np.float32) + class_targets2 = np.array([[[0, 1, 1], + [1, 1, 0]], + [[0, 0, 1], + [0, 0, 1]]], dtype=np.float32) + + anchor_means = np.array([[0, 0, .25, .25], + [0, .25, 1, 1], + [0, .1, .5, .5], + [.75, .75, 1, 1]], dtype=np.float32) + + with self.assertRaises(ValueError): + _, _, _, _ = self.execute(graph_fn, [ + anchor_means, groundtruth_boxlist1, groundtruth_boxlist2, + class_targets1, class_targets2 + ]) + + +class CreateTargetAssignerTest(tf.test.TestCase): + + def test_create_target_assigner(self): + """Tests that named constructor gives working target assigners. + + TODO(rathodv): Make this test more general. + """ + corners = [[0.0, 0.0, 1.0, 1.0]] + groundtruth = box_list.BoxList(tf.constant(corners)) + + priors = box_list.BoxList(tf.constant(corners)) + multibox_ta = (targetassigner + .create_target_assigner('Multibox', stage='proposal')) + multibox_ta.assign(priors, groundtruth) + # No tests on output, as that may vary arbitrarily as new target assigners + # are added. As long as it is constructed correctly and runs without errors, + # tests on the individual assigners cover correctness of the assignments. + + anchors = box_list.BoxList(tf.constant(corners)) + faster_rcnn_proposals_ta = (targetassigner + .create_target_assigner('FasterRCNN', + stage='proposal')) + faster_rcnn_proposals_ta.assign(anchors, groundtruth) + + fast_rcnn_ta = (targetassigner + .create_target_assigner('FastRCNN')) + fast_rcnn_ta.assign(anchors, groundtruth) + + faster_rcnn_detection_ta = (targetassigner + .create_target_assigner('FasterRCNN', + stage='detection')) + faster_rcnn_detection_ta.assign(anchors, groundtruth) + + with self.assertRaises(ValueError): + targetassigner.create_target_assigner('InvalidDetector', + stage='invalid_stage') + + +if __name__ == '__main__': + tf.test.main() diff --git a/data/coco/coco-2017/readme.txt b/data/coco/coco-2017/readme.txt new file mode 100644 index 0000000..d258195 --- /dev/null +++ b/data/coco/coco-2017/readme.txt @@ -0,0 +1 @@ +[COCO-2017 dataset](http://cocodataset.org/#download) diff --git a/data/coco/coco-ovic/annotations/readme.txt b/data/coco/coco-ovic/annotations/readme.txt new file mode 100644 index 0000000..0e8511b --- /dev/null +++ b/data/coco/coco-ovic/annotations/readme.txt @@ -0,0 +1 @@ +annotations diff --git a/data/coco/coco-ovic/coco.py b/data/coco/coco-ovic/coco.py new file mode 100644 index 0000000..eac4fc1 --- /dev/null +++ b/data/coco/coco-ovic/coco.py @@ -0,0 +1,345 @@ +"""coco Dataset Classes + +Original author: Francisco Massa +https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py + +Updated by: Ellis Brown, Max deGroot +""" + +import os +import pickle +import os.path +import sys +#import torch +#import torch.utils.data as data +#import torchvision.transforms as transforms +import cv2 +import numpy as np +import json +import uuid + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from pycocotools import mask as COCOmask + + +class COCODetection(object): + + """VOC Detection Dataset Object + + input is image, target is annotation + + Arguments: + root (string): filepath to VOCdevkit folder. + image_set (string): imageset to use (eg. 'train', 'val', 'test') + transform (callable, optional): transformation to perform on the + input image + target_transform (callable, optional): transformation to perform on the + target `annotation` + (eg: take in caption string, return tensor of word indices) + dataset_name (string, optional): which dataset to load + (default: 'VOC2007') + """ + + def __init__(self, root, image_sets, preproc=None, target_transform=None, + dataset_name='COCO'): + self.root = root + self.cache_path = os.path.join(self.root, 'cache') + self.image_set = image_sets + self.preproc = preproc + self.target_transform = target_transform + self.name = dataset_name + self.ids = list() + self.annotations = list() + + for i_set in image_sets: + if("_minminval_L" in i_set): + annofile = os.path.join(root, "annotations", "instances_minminval_L.json") + if("_minval_L" in i_set): + annofile = os.path.join(root, "annotations", "instances_minval_L.json") + # if("L" in i_set): + # annofile = os.path.join(root, "annotations", "instances_minval_L.json") + # if("L_1" in i_set): + # annofile = os.path.join(root, "annotations", "instances_minval_L_1.json") + coco_name = "2017_L" + _COCO = COCO(annofile) + self._COCO = _COCO + self.coco_name = coco_name + cats = _COCO.loadCats(_COCO.getCatIds()) + self._classes = tuple([c['name'] for c in cats]) + self.num_classes = len(self._classes) + self._class_to_ind = dict(zip(self._classes, range(self.num_classes))) + self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats], + _COCO.getCatIds())) + indexes = _COCO.getImgIds() + self.image_indexes = indexes + print(len(indexes)) + self.ids.extend([self.image_path_from_index(index) for index in indexes ]) + + #self.annotations.extend(self._load_coco_annotations(coco_name, indexes,_COCO)) + + #self.annotations = np.array(self.annotations) + #self.annGreater80 = np.where(self.annotations[:,-1]>80) + + #print(">80!!!!!!!!!!!!!!!!!!!!!!!!!!!",self.annotations.shape,type(self.annotations)) + + + + def image_path_from_index(self, index): + """ + Construct an image path from the image's "index" identifier. + """ + # Example image path for index=119993: + # images/train2014/COCO_train2014_000000119993.jpg + file_name = (str(index).zfill(12) + '.jpg') + #train_image_path = os.path.join(self.root, 'train_images', + #file_name) + val_image_path = os.path.join(self.root, 'val_images', + file_name) + if(os.path.exists(val_image_path)): + return val_image_path + else: + print("Image not found!",val_image_path) + #else: + #return train_image_path + + + def _get_ann_file(self, name): + prefix = 'instances' if name.find('test') == -1 \ + else 'image_info' + return os.path.join(self.root, 'annotations', + prefix + '_' + name + '.json') + + + def _load_coco_annotations(self, coco_name, indexes, _COCO): + cache_file=os.path.join(self.cache_path,coco_name+'_gt_roidb.pkl') + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fid: + roidb = pickle.load(fid) + print('{} gt roidb loaded from {}'.format(coco_name,cache_file)) + return roidb + + gt_roidb = [self._annotation_from_index(index, _COCO) + for index in indexes] + with open(cache_file, 'wb') as fid: + pickle.dump(gt_roidb,fid,pickle.HIGHEST_PROTOCOL) + print('wrote gt roidb to {}'.format(cache_file)) + return gt_roidb + + + def _annotation_from_index(self, index, _COCO): + """ + Loads COCO bounding-box instance annotations. Crowd instances are + handled by marking their overlaps (with all categories) to -1. This + overlap value means that crowd "instances" are excluded from training. + """ + im_ann = _COCO.loadImgs(index)[0] + width = im_ann['width'] + height = im_ann['height'] + + annIds = _COCO.getAnnIds(imgIds=index, iscrowd=None) + objs = _COCO.loadAnns(annIds) + # Sanitize bboxes -- some are invalid + valid_objs = [] + for obj in objs: + x1 = np.max((0, obj['bbox'][0])) + y1 = np.max((0, obj['bbox'][1])) + x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) + y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) + if obj['area'] > 0 and x2 >= x1 and y2 >= y1: + obj['clean_bbox'] = [x1, y1, x2, y2] + valid_objs.append(obj) + objs = valid_objs + num_objs = len(objs) + + res = np.zeros((num_objs, 5)) + + # Lookup table to map from COCO category ids to our internal class + # indices + coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls], + self._class_to_ind[cls]) + for cls in self._classes]) + #print("cls is ",coco_cat_id_to_class_ind) + + for ix, obj in enumerate(objs): + cls = coco_cat_id_to_class_ind[obj['category_id']] + #clsobj['category_id'] + res[ix, 0:4] = obj['clean_bbox'] + res[ix, 4] = cls + + return res + + + + def __getitem__(self, index): + img_id = self.ids[index] + target = self.annotations[index] + img = cv2.imread(img_id, cv2.IMREAD_COLOR) + height, width, _ = img.shape + + if self.target_transform is not None: + target = self.target_transform(target) + + + if self.preproc is not None: + img, target = self.preproc(img, target) + + # target = self.target_transform(target, width, height) + #print(target.shape) + + return img, target + + def __len__(self): + return len(self.ids) + + def pull_image(self, index): + '''Returns the original image object at index in PIL form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + PIL img + ''' + img_id = self.ids[index] + return cv2.imread(img_id, cv2.IMREAD_COLOR) + + + def pull_tensor(self, index): + '''Returns the original image at an index in tensor form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + tensorized version of img, squeezed + ''' + to_tensor = transforms.ToTensor() + return torch.Tensor(self.pull_image(index)).unsqueeze_(0) + + def _print_detection_eval_metrics(self, coco_eval,csv_path): + IoU_lo_thresh = 0.5 + IoU_hi_thresh = 0.95 + def _get_thr_ind(coco_eval, thr): + ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & + (coco_eval.params.iouThrs < thr + 1e-5))[0][0] + iou_thr = coco_eval.params.iouThrs[ind] + assert np.isclose(iou_thr, thr) + return ind + + ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) + ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) + # precision has dims (iou, recall, cls, area range, max dets) + # area range index 0: all area ranges + # max dets index 2: 100 per image + precision = \ + coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] + ap_default = np.mean(precision[precision > -1]) + print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' + '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh)) + f_csv = open(csv_path,"w") + print('{:.1f}'.format(100 * ap_default)) + print('{:.1f}'.format(100 * ap_default),file=f_csv) + for cls_ind, cls in enumerate(self._classes): + if cls == '__background__': + continue + # minus 1 because of __background__ + precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] + ap = np.mean(precision[precision > -1]) + print('{}'.format(100 * ap)) + print('{:.1f}'.format(100 * ap),file=f_csv) + + print('~~~~ Summary metrics ~~~~') + coco_eval.summarize() + + def _do_detection_eval(self, res_file, output_dir): + ann_type = 'bbox' + coco_dt = self._COCO.loadRes(res_file) + coco_eval = COCOeval(self._COCO, coco_dt) + coco_eval.params.useSegm = (ann_type == 'segm') + coco_eval.evaluate() + coco_eval.accumulate() + eval_file = os.path.join(output_dir, 'result.csv') + self._print_detection_eval_metrics(coco_eval,eval_file) + #with open(eval_file, 'wb') as fid: + #pickle.dump(coco_eval, fid, pickle.HIGHEST_PROTOCOL) + print('Wrote COCO eval results to: {}'.format(eval_file)) + + def _coco_results_one_category(self, boxes, cat_id): + results = [] + #boxes = np.array(boxes,dtype=np.float32) + #print(boxes) + for im_ind, index in enumerate(self.image_indexes): + #if(im_ind>100): break + #print("type of boxes is ",type(boxes[im_ind])) + dets = boxes[im_ind].astype(np.float) + #dets = boxes[im_ind] + if len(dets) == 0: + continue + #print(dets,dets.shape) + scores = dets[:, -1] + xs = dets[:, 0] + ys = dets[:, 1] + ws = dets[:, 2] - xs + 1 + hs = dets[:, 3] - ys + 1 + results.extend( + [{'image_id' : index, + 'category_id' : cat_id, + 'bbox' : [xs[k], ys[k], ws[k], hs[k]], + 'score' : scores[k]} for k in range(dets.shape[0])]) + return results + + def _write_coco_results_file(self, all_boxes, res_file): + # [{"image_id": 42, + # "category_id": 18, + # "bbox": [258.15,41.29,348.26,243.78], + # "score": 0.236}, ...] + results = [] + + print(self._class_to_coco_cat_id) + + for cls_ind, cls in enumerate(self._classes): + if cls == '__background__': + continue + print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, + self.num_classes )) + coco_cat_id = self._class_to_coco_cat_id[cls] + results.extend(self._coco_results_one_category(all_boxes[cls_ind], + coco_cat_id)) + ''' + if cls_ind ==30: + res_f = res_file+ '_1.json' + print('Writing results json to {}'.format(res_f)) + with open(res_f, 'w') as fid: + json.dump(results, fid) + results = [] + ''' + #res_f2 = res_file+'_2.json' + print('Writing results json to {}'.format(res_file)) + with open(res_file, 'w') as fid: + json.dump(results, fid) + + def evaluate_detections(self, all_boxes, output_dir): + res_file = os.path.join(output_dir, ('detections_' + + self.coco_name + + '_results')) + res_file += '.json' + self._write_coco_results_file(all_boxes, res_file) + # Only do evaluation on non-test sets + if self.coco_name.find('test') == -1: + self._do_detection_eval(res_file, output_dir) + # Optionally cleanup results json file + + + + def evaluate_detections_json(self,output_dir): + res_file = os.path.join(output_dir, ('detections_' + + self.coco_name + + '_results')) + + res_file += '.json' + self._do_detection_eval(res_file, output_dir) diff --git a/data/coco/coco-ovic/genMinval8000.py b/data/coco/coco-ovic/genMinval8000.py new file mode 100644 index 0000000..34201e4 --- /dev/null +++ b/data/coco/coco-ovic/genMinval8000.py @@ -0,0 +1,99 @@ +import os +import numpy as np +#import cv2 +import json +import sys + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from pycocotools import mask as COCOmask + + + +""" +out_json_dict = {"images":[], "type": "instances", "annotations": [], + "categories": []} +""" +# ann_root = "/nfs/project/OVIC/coco_2014_2017/" +# out_root = "/nfs/project/OVIC/coco_ovic/" +ann_root = "./../coco-2017/" +out_root = "./" +train_json = os.path.join(ann_root, "annotations/instances_train2017.json") +val_json = os.path.join(ann_root, "annotations/instances_val2017.json") +out_minval_json = os.path.join(out_root, "annotations/instances_minval_L.json") +out_train_json = os.path.join(out_root, "annotations/instances_train_L.json") + +out_minval_json_dict = {"images":[], "type": "instances", "annotations": [], "categories": []} +out_train_json_dict = {"images":[], "type": "instances", "annotations": [], "categories": []} + +minval_txt_f = open(os.path.join("ovic_val_2017_list.txt"), "r") + +image_ids = [] +for line in minval_txt_f.readlines(): + temp = line.strip() + _,fname = os.path.split(temp) + image_ids.append(fname) + +c = 0 + +val_f = open(val_json,"r") +val_dict = json.load(val_f) +val_f.close() +_COCO = COCO(val_json) +for i,item in enumerate(val_dict["images"]): + #if(c>2): break + img_id = item["file_name"] + index_img = item["id"] + if(img_id in image_ids): + c += 1 + out_minval_json_dict["images"].append(item) + annIds = _COCO.getAnnIds(imgIds=index_img, iscrowd=None) + objs = _COCO.loadAnns(annIds) + #print(objs) + out_minval_json_dict['annotations'].extend(objs) + else: + out_train_json_dict["images"].append(item) + annIds = _COCO.getAnnIds(imgIds=index_img, iscrowd=None) + objs = _COCO.loadAnns(annIds) + #print(objs) + out_train_json_dict['annotations'].extend(objs) + +print(c) + + +train_f = open(train_json,"r") +train_dict = json.load(train_f) +train_f.close() +_COCO = COCO(train_json) +for i,item in enumerate(train_dict["images"]): + #if(c>3): break + img_id = item["file_name"] + index_img = item["id"] + if(img_id in image_ids): + c += 1 + out_minval_json_dict["images"].append(item) + annIds = _COCO.getAnnIds(imgIds=index_img, iscrowd=None) + objs = _COCO.loadAnns(annIds) + out_minval_json_dict['annotations'].extend(objs) + else: + out_train_json_dict["images"].append(item) + annIds = _COCO.getAnnIds(imgIds=index_img, iscrowd=None) + objs = _COCO.loadAnns(annIds) + out_train_json_dict['annotations'].extend(objs) + if(c%1000==0): + print(c) + + +out_minval_json_dict["categories"] = val_dict["categories"] +out_train_json_dict["categories"] = val_dict["categories"] + +#print(c,out_minval_json_dict["annotations"][-4:-1]) + +with open(out_minval_json,"w") as f: + json.dump(out_minval_json_dict,f) + print("write minval end!") + +with open(out_train_json,"w") as f: + json.dump(out_train_json_dict,f) + print("write train end!") + diff --git a/data/coco/coco-ovic/ovic_val_2017_list.txt b/data/coco/coco-ovic/ovic_val_2017_list.txt new file mode 100644 index 0000000..936a7f8 --- /dev/null +++ b/data/coco/coco-ovic/ovic_val_2017_list.txt @@ -0,0 +1,7991 @@ +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321385.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000232538.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429246.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000118594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088897.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335851.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000503841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110435.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515779.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000421757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183387.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151820.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337160.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215170.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307611.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000488075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473171.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000494634.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350789.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204935.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194184.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358658.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000480985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276151.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386193.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000376307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554046.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211063.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523637.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212646.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000187585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357943.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327352.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061515.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000530061.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399666.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372037.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302318.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000479912.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246069.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364125.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000545958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116244.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000192871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315790.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069392.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518701.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576743.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149912.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000389453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495557.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000417876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539251.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328368.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445008.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000166426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499365.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000224222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210276.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000070048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079084.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169606.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106896.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212462.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487741.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000208363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366484.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000172946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104790.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000083172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306281.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000231125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043218.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169800.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000152214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351590.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449485.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138124.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000143998.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129420.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000167067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144862.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545385.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336113.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202387.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312870.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069293.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303971.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000074209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356836.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551608.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158770.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332153.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000234779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161202.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167920.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000283038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190689.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000455555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047263.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000219283.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224010.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000234366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125870.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524665.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000292082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499349.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000329041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007899.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000194940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417696.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000007818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499802.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000062692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445267.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046889.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329946.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000575815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027221.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118606.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434897.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362092.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000153217.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480379.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134178.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000381587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118625.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045966.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463884.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000459500.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000318080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322369.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560662.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000290248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565331.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447842.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224523.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203081.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000080671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525211.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000243199.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000442661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033626.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000111179.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543660.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000367228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435324.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000306139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515424.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077615.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473427.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000243148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183287.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037705.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000153568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087998.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434410.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031745.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269606.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148392.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526342.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512966.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301837.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000552775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494077.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000295420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441496.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023587.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000379800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132037.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000554002.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000324715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000068878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388707.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230220.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000480944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297146.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366615.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000040083.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000243034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163666.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000408112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346517.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000407646.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109679.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552061.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510642.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045710.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030383.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000772.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000466416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000502.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000281414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316323.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000298994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104137.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000549220.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000283412.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049942.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190776.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246077.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054759.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342814.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542699.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434511.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000310200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139843.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160308.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522406.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345229.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000425925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142890.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082052.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303101.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000425361.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000021465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558608.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104185.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066485.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169872.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000145591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053838.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511654.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000349480.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453390.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000397354.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508580.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000371042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147471.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000014007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404229.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496575.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542511.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293026.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000441442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470288.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082375.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000332455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286010.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123842.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301574.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000099054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418680.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339019.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000097988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403862.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363654.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000287874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299261.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000014439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112440.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000226592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326798.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000263463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429679.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207178.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504516.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000023272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100343.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137954.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509262.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000392722.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000326970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051500.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000312192.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278401.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000356387.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164255.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158412.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527002.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560367.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237600.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093405.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467484.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000004795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011169.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000462643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317035.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014756.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000268378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547341.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000552612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347648.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000460229.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025293.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474680.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554980.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000279774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205352.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304741.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333020.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000369081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396404.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000022396.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000210299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527631.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230190.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000051961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077544.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122954.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573843.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576539.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000161820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290741.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116881.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000217285.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075748.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265584.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524002.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000239843.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027969.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224595.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000102805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120473.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038064.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336046.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190160.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555387.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225558.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091318.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000407083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531126.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288150.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522702.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000181421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249809.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414750.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230862.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102446.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520237.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524373.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000099024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575776.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147030.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000196.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000458992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277235.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000575357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415076.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528046.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163253.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000439525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089861.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000061471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443887.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000188906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185821.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536743.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000107226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060677.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000379533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527486.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000303713.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000084170.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353398.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027656.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000516318.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476569.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000440617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458221.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127451.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193875.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000530466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135976.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000520324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453283.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018896.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000195754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361472.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000427997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168890.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271772.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356002.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386514.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032712.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000473219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150508.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000431848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191342.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094795.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000488736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323399.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288591.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000104782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030273.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000329319.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017655.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000558073.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310479.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000389168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271143.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000086582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327323.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000144784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043417.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375606.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277479.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157052.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461334.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275999.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443204.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152004.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000391722.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404479.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072052.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030932.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553522.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000004495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325237.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316505.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000063154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441473.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185036.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485081.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000464689.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565575.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238980.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000541664.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000132703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535842.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125070.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474934.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000434247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057904.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572886.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031981.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000521540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486026.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370701.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000569273.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000389197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183155.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000047121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518109.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513096.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000353096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413124.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000157601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274800.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000088462.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305253.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000261061.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177452.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000545730.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000252559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292741.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513775.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000179898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034445.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309237.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281809.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113935.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010766.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000356347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077689.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000356432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263346.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487897.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000002153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505826.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000132375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111076.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489304.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076087.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350648.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000396729.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561729.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368528.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000126226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213932.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513971.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135486.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000231580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370170.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140435.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452218.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451014.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000522713.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000487583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327130.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288983.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000384808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133002.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248382.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520109.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032964.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000519039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304596.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000554579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439710.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029094.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343062.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000396200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559102.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578385.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504342.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412399.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310052.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578347.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000453040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386638.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000229221.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543600.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227460.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000261706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398637.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354212.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000445602.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197424.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073753.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000041842.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000426795.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000074457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110482.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000523807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463588.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169045.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559776.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424392.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379055.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234291.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000534601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261758.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000478862.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039504.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519132.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000402720.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000127092.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395865.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013232.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291028.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459575.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010386.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376491.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000367818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329138.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151480.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000328430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396387.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045809.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170975.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000555050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064510.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000557884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352978.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505132.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000420069.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000119995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048554.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000375278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484587.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000095899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567308.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000297343.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000489014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343603.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043286.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000062025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518586.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000451714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079920.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523978.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000260266.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000326248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091844.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000561889.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000073702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572331.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279864.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000233139.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000016249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380343.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475879.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138185.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048636.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291572.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498555.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000125245.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559101.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000545007.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000476770.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293841.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000257169.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119469.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348973.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000311950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173375.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000031217.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000040471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181962.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000129054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000520.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232160.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361693.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000069356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557459.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209480.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000333697.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000074058.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317479.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045746.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040886.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322654.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046331.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467457.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000042563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349485.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439630.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000092939.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121572.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363036.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082815.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000010764.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022646.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186709.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398803.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000389315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419774.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000465129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000059325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129285.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479950.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000448256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519691.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000492077.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000001532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270544.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306099.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000284991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107234.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129739.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565031.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192403.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000243075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574110.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000140270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158277.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000465836.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422516.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000425702.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000061960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201918.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540264.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424793.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000194724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457217.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000341681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440354.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298331.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499631.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229234.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343059.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074920.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256199.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000201072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491902.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376342.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381504.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337666.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193480.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240387.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000001584.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208318.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272880.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542549.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002255.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000508917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182366.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000148999.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546067.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000348708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138151.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066297.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128833.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000239274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166255.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000213605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120592.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000020333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143040.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000206831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065035.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114870.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347142.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412204.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536416.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000199310.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102654.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082259.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220670.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000206025.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000581781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401092.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000468577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095063.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205866.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000550797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533462.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000418959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479805.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000248980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560675.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000410428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524694.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255036.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547597.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000532493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208053.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000141328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463290.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000245651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005352.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374061.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000286182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172658.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000059463.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000421834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230245.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537864.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000554156.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000509656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530652.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377111.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000507037.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283904.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556901.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000414340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307892.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093791.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455847.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246076.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216218.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577584.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552504.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036661.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000041635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240918.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104406.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548844.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000378139.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000073118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380088.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000118405.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000136600.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539422.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256035.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193021.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000414795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236954.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183217.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413900.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478575.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000015660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371427.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000350122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199602.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292169.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021310.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047255.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510606.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318146.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301317.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000313182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177149.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000322844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328745.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301093.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000022623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000562592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478250.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282062.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325153.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000420840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148055.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000562101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556230.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000214703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412443.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131743.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352445.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000361180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352538.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361046.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000200162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014088.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000546011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577826.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308645.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000448410.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052644.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000207728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372220.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000428562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150301.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185633.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287331.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190760.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000009769.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000247806.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549746.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000013659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410056.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082680.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411480.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000485071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524186.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253282.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369470.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000057672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382214.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000000872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012443.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167598.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000128372.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000403817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032947.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000114907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377911.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000563604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000179200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539557.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282942.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000041990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060992.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000050811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462632.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419386.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000396338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441522.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000570736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556101.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000047571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373783.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500446.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475439.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000284296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266853.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000211674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000179850.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000061171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254976.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314493.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000573258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504142.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554142.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281102.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291983.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000286523.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000383676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189772.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024921.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449424.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561160.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518719.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472349.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000408120.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000558213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235319.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021338.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000562229.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471625.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021470.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000126107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401037.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000453634.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000138856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564739.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176031.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188631.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564337.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000478393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279879.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205670.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000018833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296684.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000321557.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167668.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060179.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121897.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082668.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412285.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247474.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000287649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378859.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204206.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107558.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105689.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000041271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348730.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436694.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000463522.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000285349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375087.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231163.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000579893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529083.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000006723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431557.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268641.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024880.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493932.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372252.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000096427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042055.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000464522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576886.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144655.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000256192.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344878.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000473118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198352.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487607.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401240.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553558.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499785.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000484893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450169.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530905.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000161609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102746.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103462.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000547502.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268469.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026584.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029151.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119132.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033871.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000080274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438373.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029504.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000242724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503490.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153697.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128756.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000222235.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000007088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123932.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000179948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000562419.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000336309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395324.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000089078.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000020992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502084.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111806.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000250901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202658.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580698.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420490.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346863.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213843.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000376310.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027642.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371250.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254834.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000173383.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546569.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000563267.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378334.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175954.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000281032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519673.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393971.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352936.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148588.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176288.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183204.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122203.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000226984.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126634.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207027.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473783.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000173302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050514.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521943.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089894.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000226883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110501.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000400161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361259.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372316.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165229.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351133.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000093261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009707.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438481.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391584.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000476514.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348584.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000368684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378126.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086467.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412813.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311075.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000185950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329939.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546687.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000205401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493815.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000443498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550645.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000187990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085764.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514402.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436174.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290521.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000337055.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000397133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322816.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000340894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503939.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082293.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372861.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000210915.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000336356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404652.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442968.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000326128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134483.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000258793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377867.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000195165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337502.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538319.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303685.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000038825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248242.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221932.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234347.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000127494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343606.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000460494.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000090062.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000068198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528600.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035195.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045644.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000157756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357255.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578092.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300066.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000140556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228300.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000200252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047131.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578902.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481398.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402330.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525682.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000300233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261346.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569849.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000531134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104589.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000391648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290558.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460390.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000038678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221272.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000181499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057827.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000248334.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099874.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000061418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418070.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000290768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296056.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405174.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421865.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000058384.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270609.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000497867.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000261535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272673.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000088250.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513037.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577149.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000231549.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155142.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231895.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360673.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000352900.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272615.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187888.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216785.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000136715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109889.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185930.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122161.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000224200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152886.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238806.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132776.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319283.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316396.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308276.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228464.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000549136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086946.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000080273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328777.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000346707.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515792.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408789.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537574.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000507235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055077.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000253386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073450.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000445365.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000036844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525058.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577310.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435384.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043961.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052759.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499402.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336695.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200541.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000512657.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474479.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554523.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485142.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325666.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321064.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173826.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434192.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291330.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107641.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488250.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372009.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480657.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089188.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421109.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133836.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163242.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411068.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278449.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000546556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124297.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000543581.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000189752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570947.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370929.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000167486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466665.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000519491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199125.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400886.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424293.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074458.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000260106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349794.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399865.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365305.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103904.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390201.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000300341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091885.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149962.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000091619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530383.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117836.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440045.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029437.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000157365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285114.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144251.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355869.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000442456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216398.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351130.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572229.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383445.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402795.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074583.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000119233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255633.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000241326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188414.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000377723.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000001818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158279.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000025057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573406.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502979.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013445.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554607.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291412.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000242060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175611.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000025096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228541.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408480.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000013923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304924.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000270908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241010.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053058.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000045550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452178.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556073.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346934.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000158660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528084.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532732.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477087.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000325991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499903.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000094336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305480.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439188.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197245.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388130.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000474253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568131.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495680.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558642.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000032610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130966.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265063.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086001.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000496571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293909.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000015497.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000252216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579759.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357109.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294264.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369221.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557896.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000089296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272188.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000420230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283190.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000210855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199158.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000230362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476652.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475593.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513056.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098283.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033055.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422536.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000555597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094666.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095908.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000007888.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465986.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497907.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000223747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230268.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262360.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381870.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536195.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000179653.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000084674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000059352.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169757.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000548246.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000078032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130030.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153520.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259879.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050304.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149304.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229274.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000094185.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401820.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000396568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155199.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000568981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346275.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000259571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435751.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000316666.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531710.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566920.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185903.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000423506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347836.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347276.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000398905.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006180.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000201418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330986.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000576607.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261072.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000211042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293789.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000351530.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132791.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000113720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560879.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223764.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000496722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182423.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399921.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000244019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561806.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441411.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000570834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267343.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552221.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000108244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040602.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325919.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000360393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541478.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000112378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044478.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000150726.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000281447.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316237.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000504711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177842.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000397639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405811.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000082715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394050.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000135872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478614.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000319617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476939.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000211120.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000439426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267690.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000347664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119402.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353807.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000190841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048320.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000419601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570810.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000407574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012179.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025508.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000225670.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137028.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103257.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000576566.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302520.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000364587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149446.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074037.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557517.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000117525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108130.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064902.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000145620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522637.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191301.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163255.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190783.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139169.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216581.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000347335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006593.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000355325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235200.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000462728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363902.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194297.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000489764.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029558.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475389.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000068628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413126.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353231.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000574297.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540581.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524245.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233834.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000081594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134066.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000276434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578131.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000230450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385912.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174511.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096505.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161169.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250368.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459733.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000374052.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135076.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064146.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000022892.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264771.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000311190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364243.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000097230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000068120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540174.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000397327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126766.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000370478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421902.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303549.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525373.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039267.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000445722.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000575970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195406.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564003.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000237984.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260547.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000416104.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505447.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207886.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000051938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404297.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242034.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000233771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567383.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000287527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197175.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000489924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227048.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000145665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308160.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000467315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308889.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000715.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000185890.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022420.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000338191.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209062.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000256518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183949.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000359540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442695.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040064.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513064.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559454.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115618.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000327306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412876.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000085665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074030.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404546.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004936.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000005586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416059.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000289960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375530.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529668.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372764.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234889.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000268000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469719.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563912.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142240.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378795.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000496954.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410855.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000173091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234251.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556608.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139549.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525381.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238602.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080646.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393557.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451102.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508538.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156754.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000011760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424219.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000531707.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334769.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070190.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000043816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210002.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000150649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436696.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000323496.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000377635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481064.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083065.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000231097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487151.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048739.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190218.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137395.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000347254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295768.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000175438.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000577182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405047.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505156.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045392.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240646.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246590.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452881.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000156076.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000070254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285388.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000182923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050025.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000082696.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000509451.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073445.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539636.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426182.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000203488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353658.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236904.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000020247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294908.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000344816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247264.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473415.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270354.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455772.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032673.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252127.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000086220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303069.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518889.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000209829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121904.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000187362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305632.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357870.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000371699.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036077.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062398.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333167.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000554291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496252.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000059044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158160.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547000.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000451084.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072811.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000003661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535536.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000055299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451087.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417055.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181745.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173166.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000228214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481670.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479617.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000117645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342060.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140006.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000528980.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000506279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445334.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000514914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563849.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000125072.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479280.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000171050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185781.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000429281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172995.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000386277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343692.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000057244.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000393226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055295.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020748.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000383443.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081100.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252813.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000515350.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000234526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408930.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101985.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000006771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557114.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315744.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089225.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450790.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011198.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000186929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299288.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066821.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258905.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265648.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000012120.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000546325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149196.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294373.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372983.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000015278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268435.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344094.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434900.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502558.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000233727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533003.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145073.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488979.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000134096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383435.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511407.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130921.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000579158.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518027.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312868.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000240754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316189.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092683.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000458663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132719.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274240.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000176857.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000326082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247234.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314935.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016977.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326256.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000429718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444955.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150703.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000157124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000403975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285212.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198992.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000037740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266117.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000545129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406976.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413538.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000460379.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364073.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324322.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000261712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310407.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000017182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215482.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000364884.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000077396.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000060090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000259513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210777.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000332351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092248.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000177861.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000476810.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000119911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462327.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000233825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542101.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035594.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000451435.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000025386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536028.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536369.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392555.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000131386.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000383289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397117.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000060823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516813.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444152.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397069.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425905.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287574.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000090956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518729.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343976.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556083.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000025603.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011081.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000102411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548930.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349098.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134167.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000222559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391392.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000441543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077821.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215633.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106206.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000166165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449828.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279305.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395364.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000134112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127702.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225791.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000035062.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097656.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000153510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372265.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084540.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315384.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485509.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000359219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290896.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195353.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189427.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000251140.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000251824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257359.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303197.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000539883.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000199236.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000047740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079261.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000320743.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468604.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000340697.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381615.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567708.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000280918.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379554.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000160556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111801.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432588.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400554.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000372203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495090.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000405279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128669.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347210.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258346.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000080340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173500.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000227985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480317.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000088848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493753.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000572555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116574.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000502599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088748.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560700.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000494427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040896.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372147.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000243626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225177.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304698.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078188.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175597.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000178028.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235902.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075372.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420826.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074059.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465267.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398188.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161308.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070987.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492196.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417217.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492885.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434930.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075905.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410301.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178746.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000266727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061316.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123415.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000167159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462244.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000534041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124759.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434187.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000371472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038449.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000571893.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000527960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119404.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000476215.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000563882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015978.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058146.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267411.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000193494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076034.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000318138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363845.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000184978.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262425.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108129.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000329456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150686.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028978.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414487.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256301.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000011122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323186.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199346.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491168.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093686.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514600.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000521141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000059973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417520.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010217.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000515266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252968.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000460682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449490.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449454.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434510.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188534.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000068933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300701.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000571857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128059.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000367082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000186822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188192.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392251.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000111207.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000066523.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284749.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000032735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078701.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396350.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000528578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513946.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418226.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000125778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054679.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394801.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516998.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555669.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013031.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575287.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528062.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478077.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000049810.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537007.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000133343.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000109916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440539.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000327047.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183407.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315467.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018480.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174888.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000538364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339368.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052947.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436511.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056267.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565395.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000405432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540669.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131969.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237399.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000262587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390689.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508302.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371694.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457097.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000110999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215303.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000422706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453104.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516846.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000023023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148077.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426987.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455966.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113801.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034904.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198625.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009426.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000352491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104691.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000546219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035705.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000348216.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000186296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516641.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000179808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196046.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476417.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000427160.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000197022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524656.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000474078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062858.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000298738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543897.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070865.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000326542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399212.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197745.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530630.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000262895.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019446.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328372.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416145.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035894.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000038118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533032.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306383.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451951.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000455624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155087.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000037751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332087.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160031.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133412.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397719.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000122962.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046433.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000443303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275429.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000233370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567801.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558809.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256852.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000329542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293505.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009647.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185904.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536375.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000108503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318813.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153973.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000529762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349414.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429074.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000348488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228411.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000513283.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000252716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146123.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000295713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297425.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552186.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478184.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198943.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192007.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000355240.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000477288.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000226408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346932.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000482585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049633.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350084.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000437331.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000097278.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000320490.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229304.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000224675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534876.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000113403.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119113.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000240049.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000307074.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000256775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183657.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309424.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298316.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280347.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023774.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451683.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103603.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072621.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000407868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331386.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000049761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169712.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000220310.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486114.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000413043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021900.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480664.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000053626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232511.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348400.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000091654.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003794.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320796.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000412531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458750.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112212.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502318.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445055.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000453722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294030.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000459153.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521613.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000063602.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066027.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076334.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000130839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452721.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000344059.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088815.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267175.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000352684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212077.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000527220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559417.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000424135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515056.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337044.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559665.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000280891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053102.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018699.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417265.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424960.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446218.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301634.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572767.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000245320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511546.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441695.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401897.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080652.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378096.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034056.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000163155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000269311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096288.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215998.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214421.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014549.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456053.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000538230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544121.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000289586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472930.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000420916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423058.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198921.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502163.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180154.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000459757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165141.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000119088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073665.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000016502.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125228.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000389812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306611.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000078565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104002.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309633.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181948.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427133.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000525247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282768.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000423519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049473.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308856.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000405970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484066.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399177.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427189.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000289497.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000422670.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308504.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391656.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000273551.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000052507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191874.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430670.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000229311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321217.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521550.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000546976.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376608.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514292.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000289659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537611.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000280710.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145214.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000565391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502895.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045603.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073588.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194425.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255654.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323760.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000562121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218587.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000144114.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510955.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577364.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451679.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013892.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433554.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000514586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083836.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000572462.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243491.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000127263.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151657.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401642.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033345.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123511.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515465.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000066561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385454.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452966.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000365766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120776.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239575.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390094.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024721.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283290.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000310862.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000400803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107963.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449798.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000331569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093476.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000500478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388267.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133698.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160394.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000121417.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404668.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000154000.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000116479.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000016451.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317188.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000134856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414821.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334069.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102056.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000362520.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237318.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000069138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508996.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246701.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494855.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000317024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232865.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532686.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000511724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195594.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155736.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000461751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448114.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103375.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053892.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013632.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000322429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451104.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544334.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555685.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192212.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086426.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000134193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209835.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451859.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555357.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000251537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424641.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283055.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352703.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000246454.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193407.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040894.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000480122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135708.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293785.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000350023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492814.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140295.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000399665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216449.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373132.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409921.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138316.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000307658.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504813.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532003.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061925.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074132.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558864.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223122.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000324614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000626.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000048504.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000028452.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000515025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272273.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000046463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013783.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091336.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000212895.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065889.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226496.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533625.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146710.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122418.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000430871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091080.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373396.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000283268.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433460.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000185472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000572181.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000044699.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141086.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242615.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194634.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328962.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000261178.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000042008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472143.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570515.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084447.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560358.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000395903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000200870.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273930.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101189.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000521405.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359715.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000169076.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540372.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237735.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319345.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000455448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494810.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091063.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322822.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000187249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500657.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020760.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000239773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239943.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000567440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190680.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047603.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558976.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285599.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000140929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113037.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000554537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229653.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000445675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216083.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065773.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343769.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031202.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498942.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347018.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000447187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198176.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000481159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252625.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000055868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195538.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000361103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510861.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000271215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185697.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374100.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095022.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000476787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378701.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000552824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415179.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290750.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122953.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000300659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481214.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528731.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000357903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157102.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274917.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000578792.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164759.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000505573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076962.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000046378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517946.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000438226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272311.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398666.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000467848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000128791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000496800.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323128.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000334006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307262.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000356424.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078490.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196007.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000384661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580908.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000557172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331372.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000512194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263588.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416088.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203455.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000259097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234794.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000135399.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000095078.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299082.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083217.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000099182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000121839.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000331317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098721.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000572408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335325.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182469.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281315.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000233950.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227019.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000376442.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409009.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561814.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000449198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140513.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354061.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498350.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329789.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000477112.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000033759.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000397303.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000009400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541353.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000553731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354369.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163112.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088455.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000261982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491058.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000245915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144610.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000362682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398031.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428234.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000407386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000029587.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000340451.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203214.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000367095.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000320290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087399.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408568.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287829.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000168619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214369.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492395.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425462.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000040757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000545564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245447.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201859.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305268.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000142999.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000291675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533368.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131919.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453485.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182334.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000092660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063399.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330037.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015827.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015818.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192810.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414249.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000540466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000401926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463605.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000371638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365652.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330410.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117036.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107656.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000231747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383073.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541635.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358120.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385633.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296731.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504810.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000370042.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005184.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073634.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472833.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211107.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063729.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156572.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359020.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500139.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392637.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000241668.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000568147.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000527750.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000275695.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000284674.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061684.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000093333.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058174.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000320232.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393258.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223256.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000557501.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432016.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102872.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202998.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000484760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112128.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000060507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222842.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000278760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181799.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122413.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000219567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217312.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487131.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102589.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000263474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000389699.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363581.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024458.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000162858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003703.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000564280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330449.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447765.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000389295.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147979.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539673.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460982.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000161875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009941.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138054.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058462.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199575.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100196.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505386.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000094852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000365182.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000013525.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437028.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515962.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000087328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000507966.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544272.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000000730.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514321.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473541.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027530.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504977.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214519.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509388.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000250766.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513125.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000400166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000068300.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205253.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256505.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206273.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000157767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330110.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563938.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000187243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462466.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000164885.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031481.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124429.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349459.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470885.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129645.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514915.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168544.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000350488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051844.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000275749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237463.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000034873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000433124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449921.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126131.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265200.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000504541.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159969.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330920.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072632.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377881.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000393838.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456695.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000193474.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343753.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443259.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000024391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406103.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000132931.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306415.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000135604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497348.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195750.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222903.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495443.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114419.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148955.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402234.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448694.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000101280.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260802.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345499.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000263860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351793.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314396.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542694.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312081.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000195790.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021023.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330408.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000401862.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225225.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000565269.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000562557.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107839.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105367.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307265.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000217223.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000177714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046526.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000063791.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435783.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293324.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000411968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000317732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546782.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381463.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308730.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345154.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126540.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000348881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159537.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072004.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162043.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000221222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230679.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102532.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000124636.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285967.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000114164.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000298197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227852.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455956.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460962.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000196118.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000190853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000058778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239980.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490871.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199764.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000165464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334305.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000111165.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000070685.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000565469.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082678.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000293804.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431067.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000182441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120527.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000046975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408894.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000036761.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020904.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000312720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129989.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047204.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415770.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000061108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000203024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543985.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178535.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429033.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000389404.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187196.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223714.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442614.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459884.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125100.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000282841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000506458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016030.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201764.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000535523.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489743.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531178.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075319.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000331049.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000073832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124756.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084783.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299533.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000449433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335644.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467130.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549361.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000379014.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132223.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492544.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000179273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340472.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000550691.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209388.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000499266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000382668.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098979.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452676.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338256.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326854.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442726.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265851.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000176592.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000066038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368581.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000020571.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000358883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075405.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010039.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044801.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000419653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144481.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137265.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350457.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000102644.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000502336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000537727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211112.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000320554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235790.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489207.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422700.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000471567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373662.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122199.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000215778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140686.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305879.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105261.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292822.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519598.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109937.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000321887.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000068286.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000280779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084163.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000294992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156704.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491556.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049881.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468885.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315195.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232460.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061693.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000004134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000335201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160531.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000563762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486118.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000482774.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000530470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499249.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000575372.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122047.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337246.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076292.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160190.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000144305.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000333018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460373.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226973.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281601.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000580294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574350.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000340175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463211.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303538.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000533589.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000004157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133090.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033650.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170293.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000141651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115182.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000207306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000231450.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000142585.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430265.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000009668.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147195.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000517133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104108.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243213.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468063.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019028.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000402711.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000465868.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016995.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000119452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502275.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000302990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113890.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015850.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355471.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421952.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409016.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000352618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000560624.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000270677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243189.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112573.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000296105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097592.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001149.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344279.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174425.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525439.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000564816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000301402.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000198972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084936.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246672.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137538.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000188651.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223496.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000033066.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581205.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000224104.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434583.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105547.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500330.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388453.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000437898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230964.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303177.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000360951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000168706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000393682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000210890.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520232.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204536.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157206.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000562045.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017882.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513397.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000423617.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171942.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000377239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419235.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090349.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000060610.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208844.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303264.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034483.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192362.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000029397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437117.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000212322.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353405.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008401.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527718.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000270532.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132686.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000222407.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000519271.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002191.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000287328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000057570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000375812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323853.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000544299.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280211.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000366141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355183.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000512989.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000014380.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000420389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340047.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423508.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368459.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574675.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546226.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321079.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000205344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000248163.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241355.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000357816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092363.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000266409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000360772.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000305050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000003091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000423484.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396295.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000492905.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160661.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325410.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302405.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000006019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535602.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000035770.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000215982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268987.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000481252.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258433.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000078771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404338.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527445.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368402.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158588.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000169356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000436875.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494393.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279259.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272323.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456730.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381195.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000369503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424225.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462486.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000023696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249668.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000286908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000107389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000016838.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463876.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000065238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000542910.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505619.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000406332.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279279.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000278848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293378.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153307.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470423.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110330.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000224724.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000177489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319530.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105812.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568630.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000248616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000445597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197177.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443713.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454296.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547504.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367362.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478155.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000220307.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000027696.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000334015.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434006.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377406.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000489611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000118417.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440646.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204887.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000424162.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171753.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486172.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218119.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409221.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171678.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421316.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043486.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000139883.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000076249.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000097974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558457.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000529939.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000396518.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000547013.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035435.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000569565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343254.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034900.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000106563.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000297108.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000515577.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008131.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000007977.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158993.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492378.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000257896.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000410068.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000422729.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000310391.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000149123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000351626.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000406129.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000146825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000345137.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574010.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456908.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094113.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096351.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000040620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124796.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000362712.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194555.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000541924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288290.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000234697.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000129945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069021.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000229111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000416730.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000212453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000325327.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000160927.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000535578.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323615.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000051326.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232241.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000303893.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000201970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535809.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226377.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035767.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473929.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000091105.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000031636.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000377005.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000394715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000532693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113701.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000177468.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000170974.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000561314.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000292488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000178516.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007288.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454957.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064425.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466939.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000039502.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171970.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525762.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000548275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192520.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157170.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557596.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098599.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000388846.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460286.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099179.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412914.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000530099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493174.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536467.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000455675.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000027972.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273637.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030958.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197461.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000038574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190095.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000344029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000530384.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484075.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099186.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509811.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431655.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243896.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000049891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109939.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000286770.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000491725.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353295.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000439658.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000069750.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000448810.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000137106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000342933.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000119502.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434035.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000486008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260738.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000395289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000414522.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573291.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000203629.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000028134.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346140.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000253341.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000032901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000230040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000373375.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000555009.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000430583.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000074860.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032724.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000435359.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000512648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000209995.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424271.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000348045.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120792.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000475365.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000172087.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293733.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000476029.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000471394.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000260823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000573891.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000394611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000549932.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000426376.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153103.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000296222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556278.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129663.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546983.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000047597.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000475466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125524.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182369.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000148898.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000077282.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242543.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539937.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146627.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000171199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000456046.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000367489.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000493720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000044244.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531000.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167593.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245067.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000555197.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000283520.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000089045.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000079024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000194755.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112232.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000006040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000018442.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000285894.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000344888.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357041.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162363.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000088335.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000169273.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000122934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120632.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000398534.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244299.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000021167.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225715.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000249356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000353275.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384111.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546444.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000236370.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235541.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247438.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328464.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000518716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180274.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000214975.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000105622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000096732.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536716.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308849.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000360564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000246478.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000466339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000579693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000388677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000011742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000319579.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190734.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000347620.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000515760.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151807.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139127.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307866.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216198.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020777.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000052166.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000295769.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000277955.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330204.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000339099.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000451412.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000109146.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503978.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235747.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116848.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000258141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000263834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054358.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440027.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000167452.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000085192.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000352151.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000309859.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000550001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245462.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240727.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106935.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000098115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000315792.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441841.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104176.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211206.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225238.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460251.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000459823.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000314965.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000405574.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000255784.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391371.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000106375.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000297595.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520933.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000570688.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000182775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001340.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292648.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467803.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000492420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543215.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000311206.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279838.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228746.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030066.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557396.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000211175.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418623.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553869.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000265916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523660.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469480.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000030432.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000526197.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227855.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084825.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000078170.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000432429.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000152465.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000059171.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467138.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000240173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000056092.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000514366.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103757.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428739.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208494.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000501243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000557552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356622.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000127659.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000348720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000187329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000166482.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192763.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000450728.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000132706.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155974.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000058705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228506.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000499313.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124236.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000556703.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000458768.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021396.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108021.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000267709.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000499810.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000145873.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300245.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000283977.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147448.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443602.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000366611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000276437.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528386.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103863.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000409239.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043611.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000415218.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000130586.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000022492.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000455085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000487530.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000017944.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000571034.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000111086.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480173.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408817.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350736.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503294.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000355040.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150024.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000527270.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464706.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000165500.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578703.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000138503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000274491.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000316147.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000336600.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216837.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000568982.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102174.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559550.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207507.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000053949.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001591.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131815.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000105455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421566.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000372260.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431472.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288161.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251639.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000282912.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363927.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000321085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000438093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000048475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462565.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000241373.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034520.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000089697.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010014.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000464786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000082367.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000551959.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099546.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332027.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185303.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000580255.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000062877.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000315492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000164475.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458240.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008981.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000343076.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000565012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000181714.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000394940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000387150.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000570624.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539436.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000390569.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000104906.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000043613.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000157019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000454577.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000394328.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000304819.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010705.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000120007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000020779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543315.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000336587.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000041923.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000341318.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000086877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000068411.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000089356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397166.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000405306.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000083494.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099741.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329030.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000010935.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000199553.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281475.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000133567.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452312.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000332512.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000094922.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000153834.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000113397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000190722.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000045830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163076.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000189451.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000100582.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524247.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156740.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452121.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162580.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354202.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374806.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162144.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051277.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000535506.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308441.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329336.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000035498.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343643.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090778.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000307057.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000322968.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337497.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019916.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071618.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051250.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000449432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000012209.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242422.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115070.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425077.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000268356.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396304.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000158548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503081.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000452122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396754.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075434.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372428.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374990.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000443499.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000110048.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000340368.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000080022.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412384.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383046.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000416885.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000504074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000472509.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000457262.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000037470.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000523526.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000126216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324203.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100909.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000384723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279085.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000099416.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227224.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000090374.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000108953.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000479129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000191693.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000026630.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000081056.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000131581.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000537506.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000270066.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002521.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000440830.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000051628.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000397863.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466449.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385535.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000151938.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225913.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227741.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000468297.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000222825.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123136.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000237917.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000392612.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000239997.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500389.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000503569.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000201934.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000483001.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247720.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000125208.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000489829.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374266.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453626.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000136926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000368780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173737.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000252220.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381682.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000158122.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000466945.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183843.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000242379.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000199681.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000349689.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000418175.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000066926.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000408008.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000175382.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000350230.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490509.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000129690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000208476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000064409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513690.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000495199.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000312889.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000147545.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374858.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050752.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000204337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227940.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520892.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000151492.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528458.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000575911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000032992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000015687.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448181.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404128.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000288493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000509227.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000072514.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000243857.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000228071.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000441440.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000366493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007125.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000535608.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000068409.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000498374.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000453341.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000444304.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000256566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000391011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000229329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000380089.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000570664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000112493.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000326243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000123843.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457986.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000424792.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000225495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000508443.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000417284.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000540529.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000019157.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000354398.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000299609.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000461805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000525849.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553034.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000442478.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071123.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000174091.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000140231.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000197263.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000458781.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000159842.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000034757.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000224093.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000071918.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000302159.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280678.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000263796.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000408696.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000026564.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000244344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000279391.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000460147.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000394677.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000526222.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000485758.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185201.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000264599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008179.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000217219.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002477.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000300408.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370798.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000150007.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000116786.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000478430.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534669.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000285664.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152106.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000290700.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522665.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000156126.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425151.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000450559.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000460621.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000050124.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000163025.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000293554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000143822.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000179642.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536891.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100977.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000113867.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000574845.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000543551.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000183914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000308947.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000318193.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000434417.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000286994.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000448309.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000426053.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000482719.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007304.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578813.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484986.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000338019.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027476.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000385194.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578330.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520787.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000115709.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000008749.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216228.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000363887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000180653.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412604.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381257.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000452816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000566771.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000505243.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000306755.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000146831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000227656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000273002.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000412639.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500495.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000421457.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066320.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303667.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374987.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000207785.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000359546.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534887.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000364743.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000074456.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000490991.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000470298.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000139992.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066632.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000383066.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000100329.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520503.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000528276.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000484450.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000524775.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000581542.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202928.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000155546.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000254638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117407.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000396432.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000374574.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000272566.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000262576.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000469202.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000223314.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000084097.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000569058.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000529649.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000232790.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000437947.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337533.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000475779.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000494608.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000146981.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000429174.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000516856.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000250051.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000322141.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000192616.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000131431.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000446455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000578570.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000216103.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462584.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000173057.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000133636.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000539951.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000521357.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000370233.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000473050.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000344832.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000245481.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000102355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000001554.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000488788.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238488.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000281074.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000546987.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000014723.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154202.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000462241.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000447242.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000372794.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000152808.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000425412.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000218268.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000527427.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000206907.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000272599.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000502012.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000534018.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428248.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000513887.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000401446.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000480780.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000453216.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000464339.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000247317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000061181.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000173217.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000559113.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000357899.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000369997.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000346638.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000103485.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501824.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000292901.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000280017.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000356878.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000376751.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386116.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000257187.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000126671.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000329717.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000520528.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000299148.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000510548.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000328289.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000467135.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000075560.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000092795.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000226400.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000323552.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124390.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000041924.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000381914.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000313948.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000378831.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000007115.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337133.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000162692.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000137330.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000154877.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002988.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000428291.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000500084.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000124601.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000558840.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000324857.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000581317.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000497466.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000463640.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000002240.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000420281.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000189694.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000005038.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000080367.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000303089.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000536656.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000346011.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000202562.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000491947.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000431742.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000343820.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000501420.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000251816.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000404517.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000522418.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000386739.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000185327.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000404805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000330455.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000067805.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000161129.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000457616.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000238911.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000235131.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000419344.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000066397.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000213453.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000184388.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000337180.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000027353.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000153797.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000361285.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000427783.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000577065.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000025654.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000117337.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000553561.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000531070.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000393056.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000179765.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000098287.jpg +/nfs/project/OVIC/coco_2014_2017/val2017_images/000000272212.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000054355.jpg +/nfs/project/OVIC/coco_2014_2017/train2017_images/000000021232.jpg diff --git a/data/coco/coco-ovic/readme.txt b/data/coco/coco-ovic/readme.txt new file mode 100644 index 0000000..f4c042b --- /dev/null +++ b/data/coco/coco-ovic/readme.txt @@ -0,0 +1,8 @@ +# Dataset Preparation +1. Download [COCO-2017 dataset](http://cocodataset.org/#download) for Object Detection Task, in ./data/coco-2017. + +2. Run genMinval8000.py in ./data/coco/coco-ovic to generate annotation files (i.e., ./annotations/instances_train_L.json and ./annotations/instances_minval_L.json) for training and validataion according to ovic_val_2017_list.txt required by [OVIC](https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_minival_ids.txt). + +3. Run splitImage.py in ./data/coco/coco-ovic to split images according to ovic_val_2017_list.txt. The splitted images are stored in ./data/coco/coco-ovic/val_ovic and ./data/coco/coco-ovic/train_ovic. + +4. Run removeSmallObj.py in ./data/coco/coco-ovic to generate annotation file (i.e., ./annotations/instances_train_L_256.json) for images with area larger or equal to 256 for the removal of small objects. diff --git a/data/coco/coco-ovic/removeSmallObj.py b/data/coco/coco-ovic/removeSmallObj.py new file mode 100644 index 0000000..545b134 --- /dev/null +++ b/data/coco/coco-ovic/removeSmallObj.py @@ -0,0 +1,72 @@ +import os +import numpy as np +import json +import sys + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from pycocotools import mask as COCOmask + +out_root = "/data2/chengxiang/models/research/object_detection/data/coco/coco-ovic" +minval_json = os.path.join(out_root, "annotations/instances_minval_L.json") +train_json = os.path.join(out_root, "annotations/instances_train_L.json") +train_json_128_f = os.path.join(out_root, "annotations/instances_train_L_128.json") +train_json_256_f = os.path.join(out_root, "annotations/instances_train_L_256.json") +train_json_512_f = os.path.join(out_root, "annotations/instances_train_L_512.json") +train_json_1024_f = os.path.join(out_root, "annotations/instances_train_L_1024.json") + + +train_json_128 = {"images":[], "type": "instances", "annotations": [], + "categories": []} +train_json_256 = {"images":[], "type": "instances", "annotations": [], + "categories": []} +train_json_512 = {"images":[], "type": "instances", "annotations": [], + "categories": []} +train_json_1024 = {"images":[], "type": "instances", "annotations": [], + "categories": []} + +train_f = open(train_json,"r") +train_dict = json.load(train_f) +train_f.close() +_COCO = COCO(train_json) + +for i,item in enumerate(train_dict["images"]): + if(i % 1000==0): + print(i) + index_img = item["id"] + annIds = _COCO.getAnnIds(imgIds = index_img, iscrowd = None) + objs = _COCO.loadAnns(annIds) + + for obj in objs: + if(float(obj['area'])>=128): + train_json_128['annotations'].append(obj) + if(float(obj['area'])>=256): + train_json_256['annotations'].append(obj) + if(float(obj['area'])>=512): + train_json_512['annotations'].append(obj) + if(float(obj['area'])>=1024): + train_json_1024['annotations'].append(obj) + +train_json_128["images"] = train_dict["images"] +train_json_128["categories"] = train_dict["categories"] +train_json_256["images"] = train_dict["images"] +train_json_256["categories"] = train_dict["categories"] +train_json_512["images"] = train_dict["images"] +train_json_512["categories"] = train_dict["categories"] +train_json_1024["images"] = train_dict["images"] +train_json_1024["categories"] = train_dict["categories"] + +with open(train_json_128_f,"w") as f: + json.dump(train_json_128,f) + print("write train_json_128 end!") +with open(train_json_256_f,"w") as f: + json.dump(train_json_256,f) + print("write train_json_256 end!") +with open(train_json_512_f,"w") as f: + json.dump(train_json_512,f) + print("write train_json_512 end!") +with open(train_json_1024_f,"w") as f: + json.dump(train_json_1024,f) + print("write train_json_1024 end!") + + diff --git a/data/coco/coco-ovic/splitImage.py b/data/coco/coco-ovic/splitImage.py new file mode 100644 index 0000000..7d4ba9a --- /dev/null +++ b/data/coco/coco-ovic/splitImage.py @@ -0,0 +1,65 @@ +import os +import shutil +import numpy as np +#import cv2 +import json +import sys + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from pycocotools import mask as COCOmask + + + +""" +out_json_dict = {"images":[], "type": "instances", "annotations": [], + "categories": []} +""" +# ann_root = "/nfs/project/OVIC/coco_2014_2017/" +# images_root = "/nfs/project/OVIC/coco_ovic/" +coco_images_root = "./../coco-2017/" +ovic_images_root = "./" +now_val_image_split = os.path.join(coco_images_root, "val2017") +now_train_image_split = os.path.join(coco_images_root, "train2017") + +out_val_image_split = os.path.join(ovic_images_root, "val_ovic") +out_train_image_split = os.path.join(ovic_images_root, "train_ovic") + +minval_txt_f = open(os.path.join("ovic_val_2017_list.txt"), "r") + +image_ids = [] +for line in minval_txt_f.readlines(): + temp = line.strip() + _,fname = os.path.split(temp) + image_ids.append(fname) + +c = 0 + +for root,dirs,files in os.walk(now_train_image_split): + for file in files: + if(not file.lower().endswith(".jpg")): continue + src_image = os.path.join(root,file) + if(file in image_ids): + c += 1 + dst_image = os.path.join(out_val_image_split,file) + else: + dst_image = os.path.join(out_train_image_split,file) + if(c%100==0): + print(c) + #print(dst_image) + shutil.move(src_image,dst_image) + + + +for root,dirs,files in os.walk(now_val_image_split): + for file in files: + if(not file.lower().endswith(".jpg")): continue + src_image = os.path.join(root,file) + if(file in image_ids): + c += 1 + dst_image = os.path.join(out_val_image_split,file) + print(c) + else: + dst_image = os.path.join(out_train_image_split,file) + #print(dst_image) + shutil.move(src_image,dst_image) diff --git a/data/coco/coco-ovic/tflite_test.py b/data/coco/coco-ovic/tflite_test.py new file mode 100644 index 0000000..9d6d60f --- /dev/null +++ b/data/coco/coco-ovic/tflite_test.py @@ -0,0 +1,75 @@ +import numpy as np +import tensorflow as tf +import cv2 +import os +from coco import COCODetection +import pickle + + +COCOroot = "/data2/chengxiang/models/research/object_detection/data/coco/coco-ovic/" + + +def evalImage_fix(model_path, save_path, num_samples): + + # Load TFLite model and allocate tensors + interpreter = tf.contrib.lite.Interpreter(model_path) + interpreter.allocate_tensors() + + # Get input and output tensors + input_details = interpreter.get_input_details() + output_details = interpreter.get_output_details() + + all_boxes = [[np.array([]) for _ in range(num_samples)] for _ in range(80)] + + class_90_80_map = {1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21, 24: 22, 25: 23, 27: 24, 28: 25, 31: 26, 32: 27, 33: 28, 34: 29, 35: 30, 36: 31, 37: 32, 38: 33, 39: 34, 40: 35, 41: 36, 42: 37, 43: 38, 44: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 52: 46, 53: 47, 54: 48, 55: 49, 56: 50, 57: 51, 58: 52, 59: 53, 60: 54, 61: 55, 62: 56, 63: 57, 64: 58, 65: 59, 67: 60, 70: 61, 72: 62, 73: 63, 74: 64, 75: 65, 76: 66, 77: 67, 78: 68, 79: 69, 80: 70, 81: 71, 82: 72, 84: 73, 85: 74, 86: 75, 87: 76, 88: 77, 89: 78, 90: 79} + + testset = COCODetection(COCOroot, ['instances_minval_L'], None) + + for i in range(num_samples): + #if(i>10): break + print(i) + image = testset.pull_image(i) + init_shape = image.shape + Image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + Image = cv2.resize(Image, (320, 320)) + input_image = np.array([Image], dtype = np.uint8) + pred = tfliteDetect(init_shape, input_image, interpreter, input_details, output_details) + + for j in range(80): + temp = [x[0:5] for x in pred if class_90_80_map[int(x[-1])+1] == j] + all_boxes[j][i] = np.array(temp) + + print('Evaluating detections') + testset.evaluate_detections(np.array(all_boxes), save_path) + +def tfliteDetect(init_shape, Image, interpreter, input_details, output_details): + + # Test model on random input data. + input_shape = input_details[0]['shape'] + interpreter.set_tensor(input_details[0]['index'], Image) + interpreter.invoke() + + # The function `get_tensor()` returns a copy of the tensor data. + # Use `tensor()` in order to get a pointer to the tensor. + re = [] + bbox = interpreter.get_tensor(output_details[0]['index'])[0] + + b_cls = interpreter.get_tensor(output_details[1]['index'])[0] + b_score = interpreter.get_tensor(output_details[2]['index'])[0] + + for i in range(100): + bbox[i][0] *= init_shape[0] + bbox[i][2] *= init_shape[0] + bbox[i][1] *= init_shape[1] + bbox[i][3] *= init_shape[1] + bbox[i] = [bbox[i][1],bbox[i][0],bbox[i][3],bbox[i][2]] + temp = list(bbox[i]) + list([b_score[i]]) + list([b_cls[i]]) + re.append(temp) + return re + +# model_path = "/data2/chengxiang/models/research/object_detection/tmp/model_trained/lpcvc4_check_19/tflite/70317/non_34_convert/model_17.tflite" +# model_path = "/data2/chengxiang/models/research/object_detection/tmp/model_trained/lpcvc4_check_20/tflite/69919/non_34_convert/model_16.tflite" +model_path = "/data2/chengxiang/models/research/object_detection/data/coco/coco-ovic/LPCVC/model_83.tflite" +save_path = "/data2/chengxiang/models/research/object_detection/data/coco/coco-ovic/test_2/" +num_samples = 7991 +evalImage_fix(model_path, save_path, num_samples) diff --git a/data/coco/coco-ovic/train-val_statistics.py b/data/coco/coco-ovic/train-val_statistics.py new file mode 100644 index 0000000..07b16d8 --- /dev/null +++ b/data/coco/coco-ovic/train-val_statistics.py @@ -0,0 +1,82 @@ +import os +import numpy as np +import json +import sys + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from pycocotools import mask as COCOmask + + +out_root = "/data2/chengxiang/models/research/object_detection/data/coco/coco-ovic" +minval_json = os.path.join(out_root, "annotations/instances_minval_L.json") +train_json = os.path.join(out_root, "annotations/instances_train_L.json") + + +# Validation Dataset +minval_f = open(minval_json,"r") +minval_dict = json.load(minval_f) +minval_f.close() +_COCO = COCO(minval_json) + +num = 0 +num_128 = 0 +num_256 = 0 +num_512 = 0 +num_1024 = 0 +for i, item in enumerate(minval_dict["images"]): # 7991 images + if(i % 1000==0): + print(i) + index_img = item["id"] + annIds = _COCO.getAnnIds(imgIds = index_img, iscrowd = None) + objs = _COCO.loadAnns(annIds) + for obj in objs: + num += 1 + if(float(obj['area'])>=128): + num_128 += 1 + if(float(obj['area'])>=256): + num_256 += 1 + if(float(obj['area'])>=512): + num_512 += 1 + if(float(obj['area'])>=1024): + num_1024 += 1 + +print("Objects larger than 128 account for {0}".format(num_128/num)) # 0.8919482248261609 --> 90 +print("Objects larger than 256 account for {0}".format(num_256/num)) # 0.8123412677561173 --> 80 +print("Objects larger than 512 account for {0}".format(num_512/num)) # 0.7111729984411399 --> 70 +print("Objects larger than 1024 account for {0}".format(num_1024/num)) # 0.5950992240730036 --> 60 + + +# Training Dataset +train_f = open(train_json,"r") +train_dict = json.load(train_f) +train_f.close() +_COCO = COCO(train_json) + +num = 0 +num_128 = 0 +num_256 = 0 +num_512 = 0 +num_1024 = 0 +for i, item in enumerate(train_dict["images"]): + if(i % 1000==0): + print(i) + index_img = item["id"] + annIds = _COCO.getAnnIds(imgIds = index_img, iscrowd = None) + objs = _COCO.loadAnns(annIds) + for obj in objs: + num += 1 + if(float(obj['area'])>=128): + num_128 += 1 + if(float(obj['area'])>=256): + num_256 += 1 + if(float(obj['area'])>=512): + num_512 += 1 + if(float(obj['area'])>=1024): + num_1024 += 1 + +print(num, num_128, num_256, num_512, num_1024) +print("Objects larger than 128 account for {0}".format(num_128/num)) # 0.8896722477012322 --> 90 +print("Objects larger than 256 account for {0}".format(num_256/num)) # 0.8063890321297528 --> 80 +print("Objects larger than 512 account for {0}".format(num_512/num)) # 0.7010893318835902 --> 70 +print("Objects larger than 1024 account for {0}".format(num_1024/num)) # 0.5849201311437925 --> 60 diff --git a/data_decoders/tf_example_decoder.py b/data_decoders/tf_example_decoder.py new file mode 100644 index 0000000..acd112d --- /dev/null +++ b/data_decoders/tf_example_decoder.py @@ -0,0 +1,492 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tensorflow Example proto decoder for object detection. + +A decoder to decode string tensors containing serialized tensorflow.Example +protos for object detection. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import zip +import tensorflow as tf + +from object_detection.core import data_decoder +from object_detection.core import standard_fields as fields +from object_detection.protos import input_reader_pb2 +from object_detection.utils import label_map_util + +slim_example_decoder = tf.contrib.slim.tfexample_decoder + + +class _ClassTensorHandler(slim_example_decoder.Tensor): + """An ItemHandler to fetch class ids from class text.""" + + def __init__(self, + tensor_key, + label_map_proto_file, + shape_keys=None, + shape=None, + default_value=''): + """Initializes the LookupTensor handler. + + Simply calls a vocabulary (most often, a label mapping) lookup. + + Args: + tensor_key: the name of the `TFExample` feature to read the tensor from. + label_map_proto_file: File path to a text format LabelMapProto message + mapping class text to id. + shape_keys: Optional name or list of names of the TF-Example feature in + which the tensor shape is stored. If a list, then each corresponds to + one dimension of the shape. + shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is + reshaped accordingly. + default_value: The value used when the `tensor_key` is not found in a + particular `TFExample`. + + Raises: + ValueError: if both `shape_keys` and `shape` are specified. + """ + name_to_id = label_map_util.get_label_map_dict( + label_map_proto_file, use_display_name=False) + # We use a default_value of -1, but we expect all labels to be contained + # in the label map. + try: + # Dynamically try to load the tf v2 lookup, falling back to contrib + lookup = tf.compat.v2.lookup + hash_table_class = tf.compat.v2.lookup.StaticHashTable + except AttributeError: + lookup = tf.contrib.lookup + hash_table_class = tf.contrib.lookup.HashTable + name_to_id_table = hash_table_class( + initializer=lookup.KeyValueTensorInitializer( + keys=tf.constant(list(name_to_id.keys())), + values=tf.constant(list(name_to_id.values()), dtype=tf.int64)), + default_value=-1) + display_name_to_id = label_map_util.get_label_map_dict( + label_map_proto_file, use_display_name=True) + # We use a default_value of -1, but we expect all labels to be contained + # in the label map. + display_name_to_id_table = hash_table_class( + initializer=lookup.KeyValueTensorInitializer( + keys=tf.constant(list(display_name_to_id.keys())), + values=tf.constant( + list(display_name_to_id.values()), dtype=tf.int64)), + default_value=-1) + + self._name_to_id_table = name_to_id_table + self._display_name_to_id_table = display_name_to_id_table + super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape, + default_value) + + def tensors_to_item(self, keys_to_tensors): + unmapped_tensor = super(_ClassTensorHandler, + self).tensors_to_item(keys_to_tensors) + return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor), + self._display_name_to_id_table.lookup(unmapped_tensor)) + + +class _BackupHandler(slim_example_decoder.ItemHandler): + """An ItemHandler that tries two ItemHandlers in order.""" + + def __init__(self, handler, backup): + """Initializes the BackupHandler handler. + + If the first Handler's tensors_to_item returns a Tensor with no elements, + the second Handler is used. + + Args: + handler: The primary ItemHandler. + backup: The backup ItemHandler. + + Raises: + ValueError: if either is not an ItemHandler. + """ + if not isinstance(handler, slim_example_decoder.ItemHandler): + raise ValueError('Primary handler is of type %s instead of ItemHandler' % + type(handler)) + if not isinstance(backup, slim_example_decoder.ItemHandler): + raise ValueError( + 'Backup handler is of type %s instead of ItemHandler' % type(backup)) + self._handler = handler + self._backup = backup + super(_BackupHandler, self).__init__(handler.keys + backup.keys) + + def tensors_to_item(self, keys_to_tensors): + item = self._handler.tensors_to_item(keys_to_tensors) + return tf.cond( + pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0), + true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors), + false_fn=lambda: item) + + +class TfExampleDecoder(data_decoder.DataDecoder): + """Tensorflow Example proto decoder.""" + + def __init__(self, + load_instance_masks=False, + instance_mask_type=input_reader_pb2.NUMERICAL_MASKS, + label_map_proto_file=None, + use_display_name=False, + dct_method='', + num_keypoints=0, + num_additional_channels=0, + load_multiclass_scores=False): + """Constructor sets keys_to_features and items_to_handlers. + + Args: + load_instance_masks: whether or not to load and handle instance masks. + instance_mask_type: type of instance masks. Options are provided in + input_reader.proto. This is only used if `load_instance_masks` is True. + label_map_proto_file: a file path to a + object_detection.protos.StringIntLabelMap proto. If provided, then the + mapped IDs of 'image/object/class/text' will take precedence over the + existing 'image/object/class/label' ID. Also, if provided, it is + assumed that 'image/object/class/text' will be in the data. + use_display_name: whether or not to use the `display_name` for label + mapping (instead of `name`). Only used if label_map_proto_file is + provided. + dct_method: An optional string. Defaults to None. It only takes + effect when image format is jpeg, used to specify a hint about the + algorithm used for jpeg decompression. Currently valid values + are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for + example, the jpeg library does not have that specific option. + num_keypoints: the number of keypoints per object. + num_additional_channels: how many additional channels to use. + load_multiclass_scores: Whether to load multiclass scores associated with + boxes. + + Raises: + ValueError: If `instance_mask_type` option is not one of + input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or + input_reader_pb2.PNG_MASKS. + """ + # TODO(rathodv): delete unused `use_display_name` argument once we change + # other decoders to handle label maps similarly. + del use_display_name + self.keys_to_features = { + 'image/encoded': + tf.FixedLenFeature((), tf.string, default_value=''), + 'image/format': + tf.FixedLenFeature((), tf.string, default_value='jpeg'), + 'image/filename': + tf.FixedLenFeature((), tf.string, default_value=''), + 'image/key/sha256': + tf.FixedLenFeature((), tf.string, default_value=''), + 'image/source_id': + tf.FixedLenFeature((), tf.string, default_value=''), + 'image/height': + tf.FixedLenFeature((), tf.int64, default_value=1), + 'image/width': + tf.FixedLenFeature((), tf.int64, default_value=1), + # Image-level labels. + 'image/class/text': + tf.VarLenFeature(tf.string), + 'image/class/label': + tf.VarLenFeature(tf.int64), + # Object boxes and classes. + 'image/object/bbox/xmin': + tf.VarLenFeature(tf.float32), + 'image/object/bbox/xmax': + tf.VarLenFeature(tf.float32), + 'image/object/bbox/ymin': + tf.VarLenFeature(tf.float32), + 'image/object/bbox/ymax': + tf.VarLenFeature(tf.float32), + 'image/object/class/label': + tf.VarLenFeature(tf.int64), + 'image/object/class/text': + tf.VarLenFeature(tf.string), + 'image/object/area': + tf.VarLenFeature(tf.float32), + 'image/object/is_crowd': + tf.VarLenFeature(tf.int64), + 'image/object/difficult': + tf.VarLenFeature(tf.int64), + 'image/object/group_of': + tf.VarLenFeature(tf.int64), + 'image/object/weight': + tf.VarLenFeature(tf.float32), + + } + # We are checking `dct_method` instead of passing it directly in order to + # ensure TF version 1.6 compatibility. + if dct_method: + image = slim_example_decoder.Image( + image_key='image/encoded', + format_key='image/format', + channels=3, + dct_method=dct_method) + additional_channel_image = slim_example_decoder.Image( + image_key='image/additional_channels/encoded', + format_key='image/format', + channels=1, + repeated=True, + dct_method=dct_method) + else: + image = slim_example_decoder.Image( + image_key='image/encoded', format_key='image/format', channels=3) + additional_channel_image = slim_example_decoder.Image( + image_key='image/additional_channels/encoded', + format_key='image/format', + channels=1, + repeated=True) + self.items_to_handlers = { + fields.InputDataFields.image: + image, + fields.InputDataFields.source_id: ( + slim_example_decoder.Tensor('image/source_id')), + fields.InputDataFields.key: ( + slim_example_decoder.Tensor('image/key/sha256')), + fields.InputDataFields.filename: ( + slim_example_decoder.Tensor('image/filename')), + # Object boxes and classes. + fields.InputDataFields.groundtruth_boxes: ( + slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], + 'image/object/bbox/')), + fields.InputDataFields.groundtruth_area: + slim_example_decoder.Tensor('image/object/area'), + fields.InputDataFields.groundtruth_is_crowd: ( + slim_example_decoder.Tensor('image/object/is_crowd')), + fields.InputDataFields.groundtruth_difficult: ( + slim_example_decoder.Tensor('image/object/difficult')), + fields.InputDataFields.groundtruth_group_of: ( + slim_example_decoder.Tensor('image/object/group_of')), + fields.InputDataFields.groundtruth_weights: ( + slim_example_decoder.Tensor('image/object/weight')), + + } + if load_multiclass_scores: + self.keys_to_features[ + 'image/object/class/multiclass_scores'] = tf.VarLenFeature(tf.float32) + self.items_to_handlers[fields.InputDataFields.multiclass_scores] = ( + slim_example_decoder.Tensor('image/object/class/multiclass_scores')) + if num_additional_channels > 0: + self.keys_to_features[ + 'image/additional_channels/encoded'] = tf.FixedLenFeature( + (num_additional_channels,), tf.string) + self.items_to_handlers[ + fields.InputDataFields. + image_additional_channels] = additional_channel_image + self._num_keypoints = num_keypoints + if num_keypoints > 0: + self.keys_to_features['image/object/keypoint/x'] = ( + tf.VarLenFeature(tf.float32)) + self.keys_to_features['image/object/keypoint/y'] = ( + tf.VarLenFeature(tf.float32)) + self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = ( + slim_example_decoder.ItemHandlerCallback( + ['image/object/keypoint/y', 'image/object/keypoint/x'], + self._reshape_keypoints)) + if load_instance_masks: + if instance_mask_type in (input_reader_pb2.DEFAULT, + input_reader_pb2.NUMERICAL_MASKS): + self.keys_to_features['image/object/mask'] = ( + tf.VarLenFeature(tf.float32)) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_instance_masks] = ( + slim_example_decoder.ItemHandlerCallback( + ['image/object/mask', 'image/height', 'image/width'], + self._reshape_instance_masks)) + elif instance_mask_type == input_reader_pb2.PNG_MASKS: + self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_instance_masks] = ( + slim_example_decoder.ItemHandlerCallback( + ['image/object/mask', 'image/height', 'image/width'], + self._decode_png_instance_masks)) + else: + raise ValueError('Did not recognize the `instance_mask_type` option.') + if label_map_proto_file: + # If the label_map_proto is provided, try to use it in conjunction with + # the class text, and fall back to a materialized ID. + label_handler = _BackupHandler( + _ClassTensorHandler( + 'image/object/class/text', label_map_proto_file, + default_value=''), + slim_example_decoder.Tensor('image/object/class/label')) + image_label_handler = _BackupHandler( + _ClassTensorHandler( + fields.TfExampleFields.image_class_text, + label_map_proto_file, + default_value=''), + slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label)) + else: + label_handler = slim_example_decoder.Tensor('image/object/class/label') + image_label_handler = slim_example_decoder.Tensor( + fields.TfExampleFields.image_class_label) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_classes] = label_handler + self.items_to_handlers[ + fields.InputDataFields.groundtruth_image_classes] = image_label_handler + + def decode(self, tf_example_string_tensor): + """Decodes serialized tensorflow example and returns a tensor dictionary. + + Args: + tf_example_string_tensor: a string tensor holding a serialized tensorflow + example proto. + + Returns: + A dictionary of the following tensors. + fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] + containing image. + fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of + shape [2] containing shape of the image. + fields.InputDataFields.source_id - string tensor containing original + image id. + fields.InputDataFields.key - string tensor with unique sha256 hash key. + fields.InputDataFields.filename - string tensor with original dataset + filename. + fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape + [None, 4] containing box corners. + fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape + [None] containing classes for the boxes. + fields.InputDataFields.groundtruth_weights - 1D float32 tensor of + shape [None] indicating the weights of groundtruth boxes. + fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape + [None] containing containing object mask area in pixel squared. + fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape + [None] indicating if the boxes enclose a crowd. + + Optional: + fields.InputDataFields.image_additional_channels - 3D uint8 tensor of + shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim + is width; 3rd dim is the number of additional channels. + fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape + [None] indicating if the boxes represent `difficult` instances. + fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape + [None] indicating if the boxes represent `group_of` instances. + fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of + shape [None, None, 2] containing keypoints, where the coordinates of + the keypoints are ordered (y, x). + fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of + shape [None, None, None] containing instance masks. + fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape + [None] containing classes for the boxes. + fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape + [None * num_classes] containing flattened multiclass scores for + groundtruth boxes. + """ + serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) + decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, + self.items_to_handlers) + keys = decoder.list_items() + tensors = decoder.decode(serialized_example, items=keys) + tensor_dict = dict(zip(keys, tensors)) + is_crowd = fields.InputDataFields.groundtruth_is_crowd + tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) + tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) + tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape( + tensor_dict[fields.InputDataFields.image])[:2] + + if fields.InputDataFields.image_additional_channels in tensor_dict: + channels = tensor_dict[fields.InputDataFields.image_additional_channels] + channels = tf.squeeze(channels, axis=3) + channels = tf.transpose(channels, perm=[1, 2, 0]) + tensor_dict[fields.InputDataFields.image_additional_channels] = channels + + def default_groundtruth_weights(): + return tf.ones( + [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], + dtype=tf.float32) + + tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( + tf.greater( + tf.shape( + tensor_dict[fields.InputDataFields.groundtruth_weights])[0], + 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], + default_groundtruth_weights) + return tensor_dict + + def _reshape_keypoints(self, keys_to_tensors): + """Reshape keypoints. + + The instance segmentation masks are reshaped to [num_instances, + num_keypoints, 2]. + + Args: + keys_to_tensors: a dictionary from keys to tensors. + + Returns: + A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values + in {0, 1}. + """ + y = keys_to_tensors['image/object/keypoint/y'] + if isinstance(y, tf.SparseTensor): + y = tf.sparse_tensor_to_dense(y) + y = tf.expand_dims(y, 1) + x = keys_to_tensors['image/object/keypoint/x'] + if isinstance(x, tf.SparseTensor): + x = tf.sparse_tensor_to_dense(x) + x = tf.expand_dims(x, 1) + keypoints = tf.concat([y, x], 1) + keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2]) + return keypoints + + def _reshape_instance_masks(self, keys_to_tensors): + """Reshape instance segmentation masks. + + The instance segmentation masks are reshaped to [num_instances, height, + width]. + + Args: + keys_to_tensors: a dictionary from keys to tensors. + + Returns: + A 3-D float tensor of shape [num_instances, height, width] with values + in {0, 1}. + """ + height = keys_to_tensors['image/height'] + width = keys_to_tensors['image/width'] + to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32) + masks = keys_to_tensors['image/object/mask'] + if isinstance(masks, tf.SparseTensor): + masks = tf.sparse_tensor_to_dense(masks) + masks = tf.reshape( + tf.cast(tf.greater(masks, 0.0), dtype=tf.float32), to_shape) + return tf.cast(masks, tf.float32) + + def _decode_png_instance_masks(self, keys_to_tensors): + """Decode PNG instance segmentation masks and stack into dense tensor. + + The instance segmentation masks are reshaped to [num_instances, height, + width]. + + Args: + keys_to_tensors: a dictionary from keys to tensors. + + Returns: + A 3-D float tensor of shape [num_instances, height, width] with values + in {0, 1}. + """ + + def decode_png_mask(image_buffer): + image = tf.squeeze( + tf.image.decode_image(image_buffer, channels=1), axis=2) + image.set_shape([None, None]) + image = tf.cast(tf.greater(image, 0), dtype=tf.float32) + return image + + png_masks = keys_to_tensors['image/object/mask'] + height = keys_to_tensors['image/height'] + width = keys_to_tensors['image/width'] + if isinstance(png_masks, tf.SparseTensor): + png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='') + return tf.cond( + tf.greater(tf.size(png_masks), 0), + lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), + lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32))) diff --git a/data_decoders/tf_example_decoder_test.py b/data_decoders/tf_example_decoder_test.py new file mode 100644 index 0000000..9ed8df6 --- /dev/null +++ b/data_decoders/tf_example_decoder_test.py @@ -0,0 +1,997 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.data_decoders.tf_example_decoder.""" + +import os +import numpy as np +import six +import tensorflow as tf + +from object_detection.core import standard_fields as fields +from object_detection.data_decoders import tf_example_decoder +from object_detection.protos import input_reader_pb2 +from object_detection.utils import dataset_util + +slim_example_decoder = tf.contrib.slim.tfexample_decoder + + +class TfExampleDecoderTest(tf.test.TestCase): + + def _EncodeImage(self, image_tensor, encoding_type='jpeg'): + with self.test_session(): + if encoding_type == 'jpeg': + image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() + elif encoding_type == 'png': + image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval() + else: + raise ValueError('Invalid encoding type.') + return image_encoded + + def _DecodeImage(self, image_encoded, encoding_type='jpeg'): + with self.test_session(): + if encoding_type == 'jpeg': + image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval() + elif encoding_type == 'png': + image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval() + else: + raise ValueError('Invalid encoding type.') + return image_decoded + + def testDecodeAdditionalChannels(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + + additional_channel_tensor = np.random.randint( + 256, size=(4, 5, 1)).astype(np.uint8) + encoded_additional_channel = self._EncodeImage(additional_channel_tensor) + decoded_additional_channel = self._DecodeImage(encoded_additional_channel) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/additional_channels/encoded': + dataset_util.bytes_list_feature( + [encoded_additional_channel] * 2), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/source_id': + dataset_util.bytes_feature(six.b('image_id')), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + num_additional_channels=2) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + self.assertAllEqual( + np.concatenate([decoded_additional_channel] * 2, axis=2), + tensor_dict[fields.InputDataFields.image_additional_channels]) + + def testDecodeJpegImage(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + decoded_jpeg = self._DecodeImage(encoded_jpeg) + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/source_id': + dataset_util.bytes_feature(six.b('image_id')), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. + get_shape().as_list()), [None, None, 3]) + self.assertAllEqual((tensor_dict[fields.InputDataFields. + original_image_spatial_shape]. + get_shape().as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) + self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields. + original_image_spatial_shape]) + self.assertEqual( + six.b('image_id'), tensor_dict[fields.InputDataFields.source_id]) + + def testDecodeImageKeyAndFilename(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), + 'image/key/sha256': dataset_util.bytes_feature(six.b('abc')), + 'image/filename': dataset_util.bytes_feature(six.b('filename')) + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertEqual(six.b('abc'), tensor_dict[fields.InputDataFields.key]) + self.assertEqual( + six.b('filename'), tensor_dict[fields.InputDataFields.filename]) + + def testDecodePngImage(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_png = self._EncodeImage(image_tensor, encoding_type='png') + decoded_png = self._DecodeImage(encoded_png, encoding_type='png') + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': dataset_util.bytes_feature(encoded_png), + 'image/format': dataset_util.bytes_feature(six.b('png')), + 'image/source_id': dataset_util.bytes_feature( + six.b('image_id')) + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. + get_shape().as_list()), [None, None, 3]) + self.assertAllEqual((tensor_dict[fields.InputDataFields. + original_image_spatial_shape]. + get_shape().as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) + self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields. + original_image_spatial_shape]) + self.assertEqual( + six.b('image_id'), tensor_dict[fields.InputDataFields.source_id]) + + def testDecodePngInstanceMasks(self): + image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) + mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) + encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png') + decoded_png_1 = np.squeeze(mask_1.astype(np.float32)) + encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png') + decoded_png_2 = np.squeeze(mask_2.astype(np.float32)) + encoded_masks = [encoded_png_1, encoded_png_2] + decoded_masks = np.stack([decoded_png_1, decoded_png_2]) + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/mask': + dataset_util.bytes_list_feature(encoded_masks) + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual( + decoded_masks, + tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) + + def testDecodeEmptyPngInstanceMasks(self): + image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + encoded_masks = [] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/mask': + dataset_util.bytes_list_feature(encoded_masks), + 'image/height': + dataset_util.int64_feature(10), + 'image/width': + dataset_util.int64_feature(10), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + self.assertAllEqual( + tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, + [0, 10, 10]) + + def testDecodeBoundingBox(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] + .get_shape().as_list()), [None, 4]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs, + bbox_xmaxs]).transpose() + self.assertAllEqual(expected_boxes, + tensor_dict[fields.InputDataFields.groundtruth_boxes]) + + def testDecodeKeypoint(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] + keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + 'image/object/keypoint/y': + dataset_util.float_list_feature(keypoint_ys), + 'image/object/keypoint/x': + dataset_util.float_list_feature(keypoint_xs), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] + .get_shape().as_list()), [None, 4]) + self.assertAllEqual( + (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape() + .as_list()), [2, 3, 2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs, + bbox_xmaxs]).transpose() + self.assertAllEqual(expected_boxes, + tensor_dict[fields.InputDataFields.groundtruth_boxes]) + + expected_keypoints = ( + np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) + self.assertAllEqual( + expected_keypoints, + tensor_dict[fields.InputDataFields.groundtruth_keypoints]) + + def testDecodeDefaultGroundtruthWeights(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] + .get_shape().as_list()), [None, 4]) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], + np.ones(2, dtype=np.float32)) + + def testDecodeObjectLabel(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes = [0, 1] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/label': + dataset_util.int64_list_feature(bbox_classes), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [2]) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(bbox_classes, + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeMultiClassScores(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + flattened_multiclass_scores = [100., 50.] + [20., 30.] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/multiclass_scores': + dataset_util.float_list_feature(flattened_multiclass_scores + ), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + load_multiclass_scores=True) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + self.assertAllEqual(flattened_multiclass_scores, + tensor_dict[fields.InputDataFields.multiclass_scores]) + + def testDecodeEmptyMultiClassScores(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + load_multiclass_scores=True) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + self.assertEqual(0, + tensor_dict[fields.InputDataFields.multiclass_scores].size) + + def testDecodeObjectLabelNoText(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes = [1, 2] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/label': + dataset_util.int64_list_feature(bbox_classes), + })).SerializeToString() + label_map_string = """ + item { + id:1 + name:'cat' + } + item { + id:2 + name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [None]) + + init = tf.tables_initializer() + with self.test_session() as sess: + sess.run(init) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(bbox_classes, + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectLabelWithText(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes_text = [six.b('cat'), six.b('dog')] + # Annotation label gets overridden by labelmap id. + annotated_bbox_classes = [3, 4] + expected_bbox_classes = [1, 2] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/text': + dataset_util.bytes_list_feature(bbox_classes_text), + 'image/object/class/label': + dataset_util.int64_list_feature(annotated_bbox_classes), + })).SerializeToString() + label_map_string = """ + item { + id:1 + name:'cat' + } + item { + id:2 + name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + init = tf.tables_initializer() + with self.test_session() as sess: + sess.run(init) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(expected_bbox_classes, + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectLabelUnrecognizedName(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes_text = [six.b('cat'), six.b('cheetah')] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/text': + dataset_util.bytes_list_feature(bbox_classes_text), + })).SerializeToString() + + label_map_string = """ + item { + id:2 + name:'cat' + } + item { + id:1 + name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [None]) + + with self.test_session() as sess: + sess.run(tf.tables_initializer()) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual([2, -1], + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectLabelWithMappingWithDisplayName(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes_text = [six.b('cat'), six.b('dog')] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/text': + dataset_util.bytes_list_feature(bbox_classes_text), + })).SerializeToString() + + label_map_string = """ + item { + id:3 + display_name:'cat' + } + item { + id:1 + display_name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [None]) + + with self.test_session() as sess: + sess.run(tf.tables_initializer()) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual([3, 1], + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes_text = [six.b('cat'), six.b('cheetah')] + bbox_classes_id = [5, 6] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/text': + dataset_util.bytes_list_feature(bbox_classes_text), + 'image/object/class/label': + dataset_util.int64_list_feature(bbox_classes_id), + })).SerializeToString() + + label_map_string = """ + item { + name:'/m/cat' + id:3 + display_name:'cat' + } + item { + name:'/m/dog' + id:1 + display_name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + with self.test_session() as sess: + sess.run(tf.tables_initializer()) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual([3, -1], + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectLabelWithMappingWithName(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + bbox_classes_text = [six.b('cat'), six.b('dog')] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/class/text': + dataset_util.bytes_list_feature(bbox_classes_text), + })).SerializeToString() + + label_map_string = """ + item { + id:3 + name:'cat' + } + item { + id:1 + name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [None]) + + with self.test_session() as sess: + sess.run(tf.tables_initializer()) + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual([3, 1], + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testDecodeObjectArea(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + object_area = [100., 174.] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/area': + dataset_util.float_list_feature(object_area), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area] + .get_shape().as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(object_area, + tensor_dict[fields.InputDataFields.groundtruth_area]) + + def testDecodeObjectIsCrowd(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + object_is_crowd = [0, 1] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/is_crowd': + dataset_util.int64_list_feature(object_is_crowd), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual( + (tensor_dict[fields.InputDataFields.groundtruth_is_crowd].get_shape() + .as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual( + [bool(item) for item in object_is_crowd], + tensor_dict[fields.InputDataFields.groundtruth_is_crowd]) + + def testDecodeObjectDifficult(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + object_difficult = [0, 1] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/difficult': + dataset_util.int64_list_feature(object_difficult), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual( + (tensor_dict[fields.InputDataFields.groundtruth_difficult].get_shape() + .as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual( + [bool(item) for item in object_difficult], + tensor_dict[fields.InputDataFields.groundtruth_difficult]) + + def testDecodeObjectGroupOf(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + object_group_of = [0, 1] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/group_of': + dataset_util.int64_list_feature(object_group_of), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual( + (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape() + .as_list()), [2]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual( + [bool(item) for item in object_group_of], + tensor_dict[fields.InputDataFields.groundtruth_group_of]) + + def testDecodeObjectWeight(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + object_weights = [0.75, 1.0] + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/weight': + dataset_util.float_list_feature(object_weights), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights] + .get_shape().as_list()), [None]) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual(object_weights, + tensor_dict[fields.InputDataFields.groundtruth_weights]) + + def testDecodeInstanceSegmentation(self): + num_instances = 4 + image_height = 5 + image_width = 3 + + # Randomly generate image. + image_tensor = np.random.randint( + 256, size=(image_height, image_width, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + + # Randomly generate instance segmentation masks. + instance_masks = ( + np.random.randint(2, size=(num_instances, image_height, + image_width)).astype(np.float32)) + instance_masks_flattened = np.reshape(instance_masks, [-1]) + + # Randomly generate class labels for each instance. + object_classes = np.random.randint( + 100, size=(num_instances)).astype(np.int64) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/height': + dataset_util.int64_feature(image_height), + 'image/width': + dataset_util.int64_feature(image_width), + 'image/object/mask': + dataset_util.float_list_feature(instance_masks_flattened), + 'image/object/class/label': + dataset_util.int64_list_feature(object_classes) + })).SerializeToString() + example_decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=True) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual( + (tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + .get_shape().as_list()), [4, 5, 3]) + + self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] + .get_shape().as_list()), [4]) + + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + + self.assertAllEqual( + instance_masks.astype(np.float32), + tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) + self.assertAllEqual(object_classes, + tensor_dict[fields.InputDataFields.groundtruth_classes]) + + def testInstancesNotAvailableByDefault(self): + num_instances = 4 + image_height = 5 + image_width = 3 + # Randomly generate image. + image_tensor = np.random.randint( + 256, size=(image_height, image_width, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + + # Randomly generate instance segmentation masks. + instance_masks = ( + np.random.randint(2, size=(num_instances, image_height, + image_width)).astype(np.float32)) + instance_masks_flattened = np.reshape(instance_masks, [-1]) + + # Randomly generate class labels for each instance. + object_classes = np.random.randint( + 100, size=(num_instances)).astype(np.int64) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/height': + dataset_util.int64_feature(image_height), + 'image/width': + dataset_util.int64_feature(image_width), + 'image/object/mask': + dataset_util.float_list_feature(instance_masks_flattened), + 'image/object/class/label': + dataset_util.int64_list_feature(object_classes) + })).SerializeToString() + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + self.assertTrue( + fields.InputDataFields.groundtruth_instance_masks not in tensor_dict) + + def testDecodeImageLabels(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg = self._EncodeImage(image_tensor) + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), + 'image/format': dataset_util.bytes_feature(six.b('jpeg')), + 'image/class/label': dataset_util.int64_list_feature([1, 2]), + })).SerializeToString() + example_decoder = tf_example_decoder.TfExampleDecoder() + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + with self.test_session() as sess: + tensor_dict = sess.run(tensor_dict) + self.assertTrue( + fields.InputDataFields.groundtruth_image_classes in tensor_dict) + self.assertAllEqual( + tensor_dict[fields.InputDataFields.groundtruth_image_classes], + np.array([1, 2])) + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/class/text': + dataset_util.bytes_list_feature( + [six.b('dog'), six.b('cat')]), + })).SerializeToString() + label_map_string = """ + item { + id:3 + name:'cat' + } + item { + id:1 + name:'dog' + } + """ + label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') + with tf.gfile.Open(label_map_path, 'wb') as f: + f.write(label_map_string) + example_decoder = tf_example_decoder.TfExampleDecoder( + label_map_proto_file=label_map_path) + tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) + with self.test_session() as sess: + sess.run(tf.tables_initializer()) + tensor_dict = sess.run(tensor_dict) + self.assertTrue( + fields.InputDataFields.groundtruth_image_classes in tensor_dict) + self.assertAllEqual( + tensor_dict[fields.InputDataFields.groundtruth_image_classes], + np.array([1, 3])) + + +if __name__ == '__main__': + tf.test.main() diff --git a/eval_util.py b/eval_util.py new file mode 100644 index 0000000..d43bb8c --- /dev/null +++ b/eval_util.py @@ -0,0 +1,978 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Common utility functions for evaluation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import re +import time + +import numpy as np +from six.moves import range +import tensorflow as tf + +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import keypoint_ops +from object_detection.core import standard_fields as fields +from object_detection.metrics import coco_evaluation +from object_detection.utils import label_map_util +from object_detection.utils import object_detection_evaluation +from object_detection.utils import ops +from object_detection.utils import shape_utils +from object_detection.utils import visualization_utils as vis_utils + +slim = tf.contrib.slim + +# A dictionary of metric names to classes that implement the metric. The classes +# in the dictionary must implement +# utils.object_detection_evaluation.DetectionEvaluator interface. +EVAL_METRICS_CLASS_DICT = { + 'coco_detection_metrics': + coco_evaluation.CocoDetectionEvaluator, + 'coco_mask_metrics': + coco_evaluation.CocoMaskEvaluator, + 'oid_challenge_detection_metrics': + object_detection_evaluation.OpenImagesDetectionChallengeEvaluator, + 'oid_challenge_segmentation_metrics': + object_detection_evaluation + .OpenImagesInstanceSegmentationChallengeEvaluator, + 'pascal_voc_detection_metrics': + object_detection_evaluation.PascalDetectionEvaluator, + 'weighted_pascal_voc_detection_metrics': + object_detection_evaluation.WeightedPascalDetectionEvaluator, + 'precision_at_recall_detection_metrics': + object_detection_evaluation.PrecisionAtRecallDetectionEvaluator, + 'pascal_voc_instance_segmentation_metrics': + object_detection_evaluation.PascalInstanceSegmentationEvaluator, + 'weighted_pascal_voc_instance_segmentation_metrics': + object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator, + 'oid_V2_detection_metrics': + object_detection_evaluation.OpenImagesDetectionEvaluator, +} + +EVAL_DEFAULT_METRIC = 'coco_detection_metrics' + + +def write_metrics(metrics, global_step, summary_dir): + """Write metrics to a summary directory. + + Args: + metrics: A dictionary containing metric names and values. + global_step: Global step at which the metrics are computed. + summary_dir: Directory to write tensorflow summaries to. + """ + tf.logging.info('Writing metrics to tf summary.') + summary_writer = tf.summary.FileWriterCache.get(summary_dir) + for key in sorted(metrics): + summary = tf.Summary(value=[ + tf.Summary.Value(tag=key, simple_value=metrics[key]), + ]) + summary_writer.add_summary(summary, global_step) + tf.logging.info('%s: %f', key, metrics[key]) + tf.logging.info('Metrics written to tf summary.') + + +# TODO(rathodv): Add tests. +def visualize_detection_results(result_dict, + tag, + global_step, + categories, + summary_dir='', + export_dir='', + agnostic_mode=False, + show_groundtruth=False, + groundtruth_box_visualization_color='black', + min_score_thresh=.5, + max_num_predictions=20, + skip_scores=False, + skip_labels=False, + keep_image_id_for_visualization_export=False): + """Visualizes detection results and writes visualizations to image summaries. + + This function visualizes an image with its detected bounding boxes and writes + to image summaries which can be viewed on tensorboard. It optionally also + writes images to a directory. In the case of missing entry in the label map, + unknown class name in the visualization is shown as "N/A". + + Args: + result_dict: a dictionary holding groundtruth and detection + data corresponding to each image being evaluated. The following keys + are required: + 'original_image': a numpy array representing the image with shape + [1, height, width, 3] or [1, height, width, 1] + 'detection_boxes': a numpy array of shape [N, 4] + 'detection_scores': a numpy array of shape [N] + 'detection_classes': a numpy array of shape [N] + The following keys are optional: + 'groundtruth_boxes': a numpy array of shape [N, 4] + 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] + Detections are assumed to be provided in decreasing order of score and for + display, and we assume that scores are probabilities between 0 and 1. + tag: tensorboard tag (string) to associate with image. + global_step: global step at which the visualization are generated. + categories: a list of dictionaries representing all possible categories. + Each dict in this list has the following keys: + 'id': (required) an integer id uniquely identifying this category + 'name': (required) string representing category name + e.g., 'cat', 'dog', 'pizza' + 'supercategory': (optional) string representing the supercategory + e.g., 'animal', 'vehicle', 'food', etc + summary_dir: the output directory to which the image summaries are written. + export_dir: the output directory to which images are written. If this is + empty (default), then images are not exported. + agnostic_mode: boolean (default: False) controlling whether to evaluate in + class-agnostic mode or not. + show_groundtruth: boolean (default: False) controlling whether to show + groundtruth boxes in addition to detected boxes + groundtruth_box_visualization_color: box color for visualizing groundtruth + boxes + min_score_thresh: minimum score threshold for a box to be visualized + max_num_predictions: maximum number of detections to visualize + skip_scores: whether to skip score when drawing a single detection + skip_labels: whether to skip label when drawing a single detection + keep_image_id_for_visualization_export: whether to keep image identifier in + filename when exported to export_dir + Raises: + ValueError: if result_dict does not contain the expected keys (i.e., + 'original_image', 'detection_boxes', 'detection_scores', + 'detection_classes') + """ + detection_fields = fields.DetectionResultFields + input_fields = fields.InputDataFields + if not set([ + input_fields.original_image, + detection_fields.detection_boxes, + detection_fields.detection_scores, + detection_fields.detection_classes, + ]).issubset(set(result_dict.keys())): + raise ValueError('result_dict does not contain all expected keys.') + if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: + raise ValueError('If show_groundtruth is enabled, result_dict must contain ' + 'groundtruth_boxes.') + tf.logging.info('Creating detection visualizations.') + category_index = label_map_util.create_category_index(categories) + + image = np.squeeze(result_dict[input_fields.original_image], axis=0) + if image.shape[2] == 1: # If one channel image, repeat in RGB. + image = np.tile(image, [1, 1, 3]) + detection_boxes = result_dict[detection_fields.detection_boxes] + detection_scores = result_dict[detection_fields.detection_scores] + detection_classes = np.int32((result_dict[ + detection_fields.detection_classes])) + detection_keypoints = result_dict.get(detection_fields.detection_keypoints) + detection_masks = result_dict.get(detection_fields.detection_masks) + detection_boundaries = result_dict.get(detection_fields.detection_boundaries) + + # Plot groundtruth underneath detections + if show_groundtruth: + groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] + groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) + vis_utils.visualize_boxes_and_labels_on_image_array( + image=image, + boxes=groundtruth_boxes, + classes=None, + scores=None, + category_index=category_index, + keypoints=groundtruth_keypoints, + use_normalized_coordinates=False, + max_boxes_to_draw=None, + groundtruth_box_visualization_color=groundtruth_box_visualization_color) + vis_utils.visualize_boxes_and_labels_on_image_array( + image, + detection_boxes, + detection_classes, + detection_scores, + category_index, + instance_masks=detection_masks, + instance_boundaries=detection_boundaries, + keypoints=detection_keypoints, + use_normalized_coordinates=False, + max_boxes_to_draw=max_num_predictions, + min_score_thresh=min_score_thresh, + agnostic_mode=agnostic_mode, + skip_scores=skip_scores, + skip_labels=skip_labels) + + if export_dir: + if keep_image_id_for_visualization_export and result_dict[fields. + InputDataFields() + .key]: + export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( + tag, result_dict[fields.InputDataFields().key])) + else: + export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) + vis_utils.save_image_array_as_png(image, export_path) + + summary = tf.Summary(value=[ + tf.Summary.Value( + tag=tag, + image=tf.Summary.Image( + encoded_image_string=vis_utils.encode_image_array_as_png_str( + image))) + ]) + summary_writer = tf.summary.FileWriterCache.get(summary_dir) + summary_writer.add_summary(summary, global_step) + + tf.logging.info('Detection visualizations written to summary with tag %s.', + tag) + + +def _run_checkpoint_once(tensor_dict, + evaluators=None, + batch_processor=None, + checkpoint_dirs=None, + variables_to_restore=None, + restore_fn=None, + num_batches=1, + master='', + save_graph=False, + save_graph_dir='', + losses_dict=None, + eval_export_path=None, + process_metrics_fn=None): + """Evaluates metrics defined in evaluators and returns summaries. + + This function loads the latest checkpoint in checkpoint_dirs and evaluates + all metrics defined in evaluators. The metrics are processed in batch by the + batch_processor. + + Args: + tensor_dict: a dictionary holding tensors representing a batch of detections + and corresponding groundtruth annotations. + evaluators: a list of object of type DetectionEvaluator to be used for + evaluation. Note that the metric names produced by different evaluators + must be unique. + batch_processor: a function taking four arguments: + 1. tensor_dict: the same tensor_dict that is passed in as the first + argument to this function. + 2. sess: a tensorflow session + 3. batch_index: an integer representing the index of the batch amongst + all batches + By default, batch_processor is None, which defaults to running: + return sess.run(tensor_dict) + To skip an image, it suffices to return an empty dictionary in place of + result_dict. + checkpoint_dirs: list of directories to load into an EnsembleModel. If it + has only one directory, EnsembleModel will not be used -- + a DetectionModel + will be instantiated directly. Not used if restore_fn is set. + variables_to_restore: None, or a dictionary mapping variable names found in + a checkpoint to model variables. The dictionary would normally be + generated by creating a tf.train.ExponentialMovingAverage object and + calling its variables_to_restore() method. Not used if restore_fn is set. + restore_fn: None, or a function that takes a tf.Session object and correctly + restores all necessary variables from the correct checkpoint file. If + None, attempts to restore from the first directory in checkpoint_dirs. + num_batches: the number of batches to use for evaluation. + master: the location of the Tensorflow session. + save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. + save_graph_dir: where to store the Tensorflow graph on disk. If save_graph + is True this must be non-empty. + losses_dict: optional dictionary of scalar detection losses. + eval_export_path: Path for saving a json file that contains the detection + results in json format. + process_metrics_fn: a callback called with evaluation results after each + evaluation is done. It could be used e.g. to back up checkpoints with + best evaluation scores, or to call an external system to update evaluation + results in order to drive best hyper-parameter search. Parameters are: + int checkpoint_number, Dict[str, ObjectDetectionEvalMetrics] metrics, + str checkpoint_file path. + + Returns: + global_step: the count of global steps. + all_evaluator_metrics: A dictionary containing metric names and values. + + Raises: + ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least + one element. + ValueError: if save_graph is True and save_graph_dir is not defined. + """ + if save_graph and not save_graph_dir: + raise ValueError('`save_graph_dir` must be defined.') + sess = tf.Session(master, graph=tf.get_default_graph()) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + sess.run(tf.tables_initializer()) + checkpoint_file = None + if restore_fn: + restore_fn(sess) + else: + if not checkpoint_dirs: + raise ValueError('`checkpoint_dirs` must have at least one entry.') + checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) + saver = tf.train.Saver(variables_to_restore) + saver.restore(sess, checkpoint_file) + + if save_graph: + tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') + + counters = {'skipped': 0, 'success': 0} + aggregate_result_losses_dict = collections.defaultdict(list) + with tf.contrib.slim.queues.QueueRunners(sess): + try: + for batch in range(int(num_batches)): + if (batch + 1) % 100 == 0: + tf.logging.info('Running eval ops batch %d/%d', batch + 1, + num_batches) + if not batch_processor: + try: + if not losses_dict: + losses_dict = {} + result_dict, result_losses_dict = sess.run([tensor_dict, + losses_dict]) + counters['success'] += 1 + except tf.errors.InvalidArgumentError: + tf.logging.info('Skipping image') + counters['skipped'] += 1 + result_dict = {} + else: + result_dict, result_losses_dict = batch_processor( + tensor_dict, sess, batch, counters, losses_dict=losses_dict) + if not result_dict: + continue + for key, value in iter(result_losses_dict.items()): + aggregate_result_losses_dict[key].append(value) + for evaluator in evaluators: + # TODO(b/65130867): Use image_id tensor once we fix the input data + # decoders to return correct image_id. + # TODO(akuznetsa): result_dict contains batches of images, while + # add_single_ground_truth_image_info expects a single image. Fix + if (isinstance(result_dict, dict) and + fields.InputDataFields.key in result_dict and + result_dict[fields.InputDataFields.key]): + image_id = result_dict[fields.InputDataFields.key] + else: + image_id = batch + evaluator.add_single_ground_truth_image_info( + image_id=image_id, groundtruth_dict=result_dict) + evaluator.add_single_detected_image_info( + image_id=image_id, detections_dict=result_dict) + tf.logging.info('Running eval batches done.') + except tf.errors.OutOfRangeError: + tf.logging.info('Done evaluating -- epoch limit reached') + finally: + # When done, ask the threads to stop. + tf.logging.info('# success: %d', counters['success']) + tf.logging.info('# skipped: %d', counters['skipped']) + all_evaluator_metrics = {} + if eval_export_path and eval_export_path is not None: + for evaluator in evaluators: + if (isinstance(evaluator, coco_evaluation.CocoDetectionEvaluator) or + isinstance(evaluator, coco_evaluation.CocoMaskEvaluator)): + tf.logging.info('Started dumping to json file.') + evaluator.dump_detections_to_json_file( + json_output_path=eval_export_path) + tf.logging.info('Finished dumping to json file.') + for evaluator in evaluators: + metrics = evaluator.evaluate() + evaluator.clear() + if any(key in all_evaluator_metrics for key in metrics): + raise ValueError('Metric names between evaluators must not collide.') + all_evaluator_metrics.update(metrics) + global_step = tf.train.global_step(sess, tf.train.get_global_step()) + + for key, value in iter(aggregate_result_losses_dict.items()): + all_evaluator_metrics['Losses/' + key] = np.mean(value) + if process_metrics_fn and checkpoint_file: + m = re.search(r'model.ckpt-(\d+)$', checkpoint_file) + if not m: + tf.logging.error('Failed to parse checkpoint number from: %s', + checkpoint_file) + else: + checkpoint_number = int(m.group(1)) + process_metrics_fn(checkpoint_number, all_evaluator_metrics, + checkpoint_file) + sess.close() + return (global_step, all_evaluator_metrics) + + +# TODO(rathodv): Add tests. +def repeated_checkpoint_run(tensor_dict, + summary_dir, + evaluators, + batch_processor=None, + checkpoint_dirs=None, + variables_to_restore=None, + restore_fn=None, + num_batches=1, + eval_interval_secs=120, + max_number_of_evaluations=None, + max_evaluation_global_step=None, + master='', + save_graph=False, + save_graph_dir='', + losses_dict=None, + eval_export_path=None, + process_metrics_fn=None): + """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. + + This function repeatedly loads a checkpoint and evaluates a desired + set of tensors (provided by tensor_dict) and hands the resulting numpy + arrays to a function result_processor which can be used to further + process/save/visualize the results. + + Args: + tensor_dict: a dictionary holding tensors representing a batch of detections + and corresponding groundtruth annotations. + summary_dir: a directory to write metrics summaries. + evaluators: a list of object of type DetectionEvaluator to be used for + evaluation. Note that the metric names produced by different evaluators + must be unique. + batch_processor: a function taking three arguments: + 1. tensor_dict: the same tensor_dict that is passed in as the first + argument to this function. + 2. sess: a tensorflow session + 3. batch_index: an integer representing the index of the batch amongst + all batches + By default, batch_processor is None, which defaults to running: + return sess.run(tensor_dict) + checkpoint_dirs: list of directories to load into a DetectionModel or an + EnsembleModel if restore_fn isn't set. Also used to determine when to run + next evaluation. Must have at least one element. + variables_to_restore: None, or a dictionary mapping variable names found in + a checkpoint to model variables. The dictionary would normally be + generated by creating a tf.train.ExponentialMovingAverage object and + calling its variables_to_restore() method. Not used if restore_fn is set. + restore_fn: a function that takes a tf.Session object and correctly restores + all necessary variables from the correct checkpoint file. + num_batches: the number of batches to use for evaluation. + eval_interval_secs: the number of seconds between each evaluation run. + max_number_of_evaluations: the max number of iterations of the evaluation. + If the value is left as None the evaluation continues indefinitely. + max_evaluation_global_step: global step when evaluation stops. + master: the location of the Tensorflow session. + save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. + save_graph_dir: where to save on disk the Tensorflow graph. If store_graph + is True this must be non-empty. + losses_dict: optional dictionary of scalar detection losses. + eval_export_path: Path for saving a json file that contains the detection + results in json format. + process_metrics_fn: a callback called with evaluation results after each + evaluation is done. It could be used e.g. to back up checkpoints with + best evaluation scores, or to call an external system to update evaluation + results in order to drive best hyper-parameter search. Parameters are: + int checkpoint_number, Dict[str, ObjectDetectionEvalMetrics] metrics, + str checkpoint_file path. + + Returns: + metrics: A dictionary containing metric names and values in the latest + evaluation. + + Raises: + ValueError: if max_num_of_evaluations is not None or a positive number. + ValueError: if checkpoint_dirs doesn't have at least one element. + """ + if max_number_of_evaluations and max_number_of_evaluations <= 0: + raise ValueError( + '`max_number_of_evaluations` must be either None or a positive number.') + if max_evaluation_global_step and max_evaluation_global_step <= 0: + raise ValueError( + '`max_evaluation_global_step` must be either None or positive.') + + if not checkpoint_dirs: + raise ValueError('`checkpoint_dirs` must have at least one entry.') + + last_evaluated_model_path = None + number_of_evaluations = 0 + while True: + start = time.time() + tf.logging.info('Starting evaluation at ' + time.strftime( + '%Y-%m-%d-%H:%M:%S', time.gmtime())) + model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) + if not model_path: + tf.logging.info('No model found in %s. Will try again in %d seconds', + checkpoint_dirs[0], eval_interval_secs) + elif model_path == last_evaluated_model_path: + tf.logging.info('Found already evaluated checkpoint. Will try again in ' + '%d seconds', eval_interval_secs) + else: + last_evaluated_model_path = model_path + global_step, metrics = _run_checkpoint_once( + tensor_dict, + evaluators, + batch_processor, + checkpoint_dirs, + variables_to_restore, + restore_fn, + num_batches, + master, + save_graph, + save_graph_dir, + losses_dict=losses_dict, + eval_export_path=eval_export_path, + process_metrics_fn=process_metrics_fn) + write_metrics(metrics, global_step, summary_dir) + if (max_evaluation_global_step and + global_step >= max_evaluation_global_step): + tf.logging.info('Finished evaluation!') + break + number_of_evaluations += 1 + + if (max_number_of_evaluations and + number_of_evaluations >= max_number_of_evaluations): + tf.logging.info('Finished evaluation!') + break + time_to_next_eval = start + eval_interval_secs - time.time() + if time_to_next_eval > 0: + time.sleep(time_to_next_eval) + + return metrics + + +def _scale_box_to_absolute(args): + boxes, image_shape = args + return box_list_ops.to_absolute_coordinates( + box_list.BoxList(boxes), image_shape[0], image_shape[1]).get() + + +def _resize_detection_masks(args): + detection_boxes, detection_masks, image_shape = args + detection_masks_reframed = ops.reframe_box_masks_to_image_masks( + detection_masks, detection_boxes, image_shape[0], image_shape[1]) + return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) + + +def _resize_groundtruth_masks(args): + mask, image_shape = args + mask = tf.expand_dims(mask, 3) + mask = tf.image.resize_images( + mask, + image_shape, + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, + align_corners=True) + return tf.cast(tf.squeeze(mask, 3), tf.uint8) + + +def _scale_keypoint_to_absolute(args): + keypoints, image_shape = args + return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1]) + + +def result_dict_for_single_example(image, + key, + detections, + groundtruth=None, + class_agnostic=False, + scale_to_absolute=False): + """Merges all detection and groundtruth information for a single example. + + Note that evaluation tools require classes that are 1-indexed, and so this + function performs the offset. If `class_agnostic` is True, all output classes + have label 1. + + Args: + image: A single 4D uint8 image tensor of shape [1, H, W, C]. + key: A single string tensor identifying the image. + detections: A dictionary of detections, returned from + DetectionModel.postprocess(). + groundtruth: (Optional) Dictionary of groundtruth items, with fields: + 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in + normalized coordinates. + 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. + 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) + 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) + 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) + 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) + 'groundtruth_instance_masks': 3D int64 tensor of instance masks + (Optional). + class_agnostic: Boolean indicating whether the detections are class-agnostic + (i.e. binary). Default False. + scale_to_absolute: Boolean indicating whether boxes and keypoints should be + scaled to absolute coordinates. Note that for IoU based evaluations, it + does not matter whether boxes are expressed in absolute or relative + coordinates. Default False. + + Returns: + A dictionary with: + 'original_image': A [1, H, W, C] uint8 image tensor. + 'key': A string tensor with image identifier. + 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in + normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. + 'detection_scores': [max_detections] float32 tensor of scores. + 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. + 'detection_masks': [max_detections, H, W] float32 tensor of binarized + masks, reframed to full image masks. + 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in + normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. (Optional) + 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. + (Optional) + 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) + 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) + 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) + 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) + 'groundtruth_instance_masks': 3D int64 tensor of instance masks + (Optional). + + """ + + if groundtruth: + max_gt_boxes = tf.shape( + groundtruth[fields.InputDataFields.groundtruth_boxes])[0] + for gt_key in groundtruth: + # expand groundtruth dict along the batch dimension. + groundtruth[gt_key] = tf.expand_dims(groundtruth[gt_key], 0) + + for detection_key in detections: + detections[detection_key] = tf.expand_dims( + detections[detection_key][0], axis=0) + + batched_output_dict = result_dict_for_batched_example( + image, + tf.expand_dims(key, 0), + detections, + groundtruth, + class_agnostic, + scale_to_absolute, + max_gt_boxes=max_gt_boxes) + + exclude_keys = [ + fields.InputDataFields.original_image, + fields.DetectionResultFields.num_detections, + fields.InputDataFields.num_groundtruth_boxes + ] + + output_dict = { + fields.InputDataFields.original_image: + batched_output_dict[fields.InputDataFields.original_image] + } + + for key in batched_output_dict: + # remove the batch dimension. + if key not in exclude_keys: + output_dict[key] = tf.squeeze(batched_output_dict[key], 0) + return output_dict + + +def result_dict_for_batched_example(images, + keys, + detections, + groundtruth=None, + class_agnostic=False, + scale_to_absolute=False, + original_image_spatial_shapes=None, + true_image_shapes=None, + max_gt_boxes=None): + """Merges all detection and groundtruth information for a single example. + + Note that evaluation tools require classes that are 1-indexed, and so this + function performs the offset. If `class_agnostic` is True, all output classes + have label 1. + + Args: + images: A single 4D uint8 image tensor of shape [batch_size, H, W, C]. + keys: A [batch_size] string tensor with image identifier. + detections: A dictionary of detections, returned from + DetectionModel.postprocess(). + groundtruth: (Optional) Dictionary of groundtruth items, with fields: + 'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor + of boxes, in normalized coordinates. + 'groundtruth_classes': [batch_size, max_number_of_boxes] int64 tensor of + 1-indexed classes. + 'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of + bbox area. (Optional) + 'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64 + tensor. (Optional) + 'groundtruth_difficult': [batch_size, max_number_of_boxes] int64 + tensor. (Optional) + 'groundtruth_group_of': [batch_size, max_number_of_boxes] int64 + tensor. (Optional) + 'groundtruth_instance_masks': 4D int64 tensor of instance + masks (Optional). + class_agnostic: Boolean indicating whether the detections are class-agnostic + (i.e. binary). Default False. + scale_to_absolute: Boolean indicating whether boxes and keypoints should be + scaled to absolute coordinates. Note that for IoU based evaluations, it + does not matter whether boxes are expressed in absolute or relative + coordinates. Default False. + original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2] + used to resize the image. When set to None, the image size is retained. + true_image_shapes: A 2D int32 tensor of shape [batch_size, 3] + containing the size of the unpadded original_image. + max_gt_boxes: [batch_size] tensor representing the maximum number of + groundtruth boxes to pad. + + Returns: + A dictionary with: + 'original_image': A [batch_size, H, W, C] uint8 image tensor. + 'original_image_spatial_shape': A [batch_size, 2] tensor containing the + original image sizes. + 'true_image_shape': A [batch_size, 3] tensor containing the size of + the unpadded original_image. + 'key': A [batch_size] string tensor with image identifier. + 'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes, + in normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. + 'detection_scores': [batch_size, max_detections] float32 tensor of scores. + 'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed + classes. + 'detection_masks': [batch_size, max_detections, H, W] float32 tensor of + binarized masks, reframed to full image masks. + 'num_detections': [batch_size] int64 tensor containing number of valid + detections. + 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in + normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. (Optional) + 'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed + classes. (Optional) + 'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox + area. (Optional) + 'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional) + 'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional) + 'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional) + 'groundtruth_instance_masks': 4D int64 tensor of instance masks + (Optional). + 'num_groundtruth_boxes': [batch_size] tensor containing the maximum number + of groundtruth boxes per image. + + Raises: + ValueError: if original_image_spatial_shape is not 2D int32 tensor of shape + [2]. + ValueError: if true_image_shapes is not 2D int32 tensor of shape + [3]. + """ + label_id_offset = 1 # Applying label id offset (b/63711816) + + input_data_fields = fields.InputDataFields + if original_image_spatial_shapes is None: + original_image_spatial_shapes = tf.tile( + tf.expand_dims(tf.shape(images)[1:3], axis=0), + multiples=[tf.shape(images)[0], 1]) + else: + if (len(original_image_spatial_shapes.shape) != 2 and + original_image_spatial_shapes.shape[1] != 2): + raise ValueError( + '`original_image_spatial_shape` should be a 2D tensor of shape ' + '[batch_size, 2].') + + if true_image_shapes is None: + true_image_shapes = tf.tile( + tf.expand_dims(tf.shape(images)[1:4], axis=0), + multiples=[tf.shape(images)[0], 1]) + else: + if (len(true_image_shapes.shape) != 2 + and true_image_shapes.shape[1] != 3): + raise ValueError('`true_image_shapes` should be a 2D tensor of ' + 'shape [batch_size, 3].') + + output_dict = { + input_data_fields.original_image: + images, + input_data_fields.key: + keys, + input_data_fields.original_image_spatial_shape: ( + original_image_spatial_shapes), + input_data_fields.true_image_shape: + true_image_shapes + } + + detection_fields = fields.DetectionResultFields + detection_boxes = detections[detection_fields.detection_boxes] + detection_scores = detections[detection_fields.detection_scores] + num_detections = tf.cast(detections[detection_fields.num_detections], + dtype=tf.int32) + + if class_agnostic: + detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) + else: + detection_classes = ( + tf.to_int64(detections[detection_fields.detection_classes]) + + label_id_offset) + + if scale_to_absolute: + output_dict[detection_fields.detection_boxes] = ( + shape_utils.static_or_dynamic_map_fn( + _scale_box_to_absolute, + elems=[detection_boxes, original_image_spatial_shapes], + dtype=tf.float32)) + else: + output_dict[detection_fields.detection_boxes] = detection_boxes + output_dict[detection_fields.detection_classes] = detection_classes + output_dict[detection_fields.detection_scores] = detection_scores + output_dict[detection_fields.num_detections] = num_detections + + if detection_fields.detection_masks in detections: + detection_masks = detections[detection_fields.detection_masks] + # TODO(rathodv): This should be done in model's postprocess + # function ideally. + output_dict[detection_fields.detection_masks] = ( + shape_utils.static_or_dynamic_map_fn( + _resize_detection_masks, + elems=[detection_boxes, detection_masks, + original_image_spatial_shapes], + dtype=tf.uint8)) + + if detection_fields.detection_keypoints in detections: + detection_keypoints = detections[detection_fields.detection_keypoints] + output_dict[detection_fields.detection_keypoints] = detection_keypoints + if scale_to_absolute: + output_dict[detection_fields.detection_keypoints] = ( + shape_utils.static_or_dynamic_map_fn( + _scale_keypoint_to_absolute, + elems=[detection_keypoints, original_image_spatial_shapes], + dtype=tf.float32)) + + if groundtruth: + if max_gt_boxes is None: + if input_data_fields.num_groundtruth_boxes in groundtruth: + max_gt_boxes = groundtruth[input_data_fields.num_groundtruth_boxes] + else: + raise ValueError( + 'max_gt_boxes must be provided when processing batched examples.') + + if input_data_fields.groundtruth_instance_masks in groundtruth: + masks = groundtruth[input_data_fields.groundtruth_instance_masks] + groundtruth[input_data_fields.groundtruth_instance_masks] = ( + shape_utils.static_or_dynamic_map_fn( + _resize_groundtruth_masks, + elems=[masks, original_image_spatial_shapes], + dtype=tf.uint8)) + + output_dict.update(groundtruth) + + image_shape = tf.cast(tf.shape(images), tf.float32) + image_height, image_width = image_shape[1], image_shape[2] + + def _scale_box_to_normalized_true_image(args): + """Scale the box coordinates to be relative to the true image shape.""" + boxes, true_image_shape = args + true_image_shape = tf.cast(true_image_shape, tf.float32) + true_height, true_width = true_image_shape[0], true_image_shape[1] + normalized_window = tf.stack([0.0, 0.0, true_height / image_height, + true_width / image_width]) + return box_list_ops.change_coordinate_frame( + box_list.BoxList(boxes), normalized_window).get() + + groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] + groundtruth_boxes = shape_utils.static_or_dynamic_map_fn( + _scale_box_to_normalized_true_image, + elems=[groundtruth_boxes, true_image_shapes], dtype=tf.float32) + output_dict[input_data_fields.groundtruth_boxes] = groundtruth_boxes + + if scale_to_absolute: + groundtruth_boxes = output_dict[input_data_fields.groundtruth_boxes] + output_dict[input_data_fields.groundtruth_boxes] = ( + shape_utils.static_or_dynamic_map_fn( + _scale_box_to_absolute, + elems=[groundtruth_boxes, original_image_spatial_shapes], + dtype=tf.float32)) + + # For class-agnostic models, groundtruth classes all become 1. + if class_agnostic: + groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] + groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) + output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes + + output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes + + return output_dict + + +def get_evaluators(eval_config, categories, evaluator_options=None): + """Returns the evaluator class according to eval_config, valid for categories. + + Args: + eval_config: An `eval_pb2.EvalConfig`. + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + evaluator_options: A dictionary of metric names (see + EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization + keyword arguments. For example: + evalator_options = { + 'coco_detection_metrics': {'include_metrics_per_category': True} + } + + Returns: + An list of instances of DetectionEvaluator. + + Raises: + ValueError: if metric is not in the metric class dictionary. + """ + evaluator_options = evaluator_options or {} + eval_metric_fn_keys = eval_config.metrics_set + if not eval_metric_fn_keys: + eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] + evaluators_list = [] + for eval_metric_fn_key in eval_metric_fn_keys: + if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: + raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) + kwargs_dict = (evaluator_options[eval_metric_fn_key] if eval_metric_fn_key + in evaluator_options else {}) + evaluators_list.append(EVAL_METRICS_CLASS_DICT[eval_metric_fn_key]( + categories, + **kwargs_dict)) + return evaluators_list + + +def get_eval_metric_ops_for_evaluators(eval_config, + categories, + eval_dict): + """Returns eval metrics ops to use with `tf.estimator.EstimatorSpec`. + + Args: + eval_config: An `eval_pb2.EvalConfig`. + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + eval_dict: An evaluation dictionary, returned from + result_dict_for_single_example(). + + Returns: + A dictionary of metric names to tuple of value_op and update_op that can be + used as eval metric ops in tf.EstimatorSpec. + """ + eval_metric_ops = {} + evaluator_options = evaluator_options_from_eval_config(eval_config) + evaluators_list = get_evaluators(eval_config, categories, evaluator_options) + for evaluator in evaluators_list: + eval_metric_ops.update(evaluator.get_estimator_eval_metric_ops( + eval_dict)) + return eval_metric_ops + + +def evaluator_options_from_eval_config(eval_config): + """Produces a dictionary of evaluation options for each eval metric. + + Args: + eval_config: An `eval_pb2.EvalConfig`. + + Returns: + evaluator_options: A dictionary of metric names (see + EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization + keyword arguments. For example: + evalator_options = { + 'coco_detection_metrics': {'include_metrics_per_category': True} + } + """ + eval_metric_fn_keys = eval_config.metrics_set + evaluator_options = {} + for eval_metric_fn_key in eval_metric_fn_keys: + if eval_metric_fn_key in ('coco_detection_metrics', 'coco_mask_metrics'): + evaluator_options[eval_metric_fn_key] = { + 'include_metrics_per_category': ( + eval_config.include_metrics_per_category) + } + elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': + evaluator_options[eval_metric_fn_key] = { + 'recall_lower_bound': (eval_config.recall_lower_bound), + 'recall_upper_bound': (eval_config.recall_upper_bound) + } + return evaluator_options diff --git a/eval_util_test.py b/eval_util_test.py new file mode 100644 index 0000000..833aaa9 --- /dev/null +++ b/eval_util_test.py @@ -0,0 +1,310 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for eval_util.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +import numpy as np +import six +from six.moves import range +import tensorflow as tf + +from object_detection import eval_util +from object_detection.core import standard_fields as fields +from object_detection.protos import eval_pb2 +from object_detection.utils import test_case + + +class EvalUtilTest(test_case.TestCase, parameterized.TestCase): + + def _get_categories_list(self): + return [{'id': 1, 'name': 'person'}, + {'id': 2, 'name': 'dog'}, + {'id': 3, 'name': 'cat'}] + + def _make_evaluation_dict(self, + resized_groundtruth_masks=False, + batch_size=1, + max_gt_boxes=None, + scale_to_absolute=False): + input_data_fields = fields.InputDataFields + detection_fields = fields.DetectionResultFields + + image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8) + if batch_size == 1: + key = tf.constant('image1') + else: + key = tf.constant([str(i) for i in range(batch_size)]) + detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]), + multiples=[batch_size, 1, 1]) + detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1]) + detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1]) + detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32), + multiples=[batch_size, 1, 1, 1]) + num_detections = tf.ones([batch_size]) + groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) + groundtruth_classes = tf.constant([1]) + groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) + if resized_groundtruth_masks: + groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) + + if batch_size > 1: + groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0), + multiples=[batch_size, 1, 1]) + groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0), + multiples=[batch_size, 1]) + groundtruth_instance_masks = tf.tile( + tf.expand_dims(groundtruth_instance_masks, 0), + multiples=[batch_size, 1, 1, 1]) + + detections = { + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes, + detection_fields.detection_masks: detection_masks, + detection_fields.num_detections: num_detections + } + groundtruth = { + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks + } + if batch_size > 1: + return eval_util.result_dict_for_batched_example( + image, key, detections, groundtruth, + scale_to_absolute=scale_to_absolute, + max_gt_boxes=max_gt_boxes) + else: + return eval_util.result_dict_for_single_example( + image, key, detections, groundtruth, + scale_to_absolute=scale_to_absolute) + + @parameterized.parameters( + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True}, + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False} + ) + def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1, + max_gt_boxes=None, + scale_to_absolute=False): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend(['coco_detection_metrics']) + categories = self._get_categories_list() + eval_dict = self._make_evaluation_dict(batch_size=batch_size, + max_gt_boxes=max_gt_boxes, + scale_to_absolute=scale_to_absolute) + metric_ops = eval_util.get_eval_metric_ops_for_evaluators( + eval_config, categories, eval_dict) + _, update_op = metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + metrics = {} + for key, (value_op, _) in six.iteritems(metric_ops): + metrics[key] = value_op + sess.run(update_op) + metrics = sess.run(metrics) + self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP']) + self.assertNotIn('DetectionMasks_Precision/mAP', metrics) + + @parameterized.parameters( + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True}, + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False} + ) + def test_get_eval_metric_ops_for_coco_detections_and_masks( + self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend( + ['coco_detection_metrics', 'coco_mask_metrics']) + categories = self._get_categories_list() + eval_dict = self._make_evaluation_dict(batch_size=batch_size, + max_gt_boxes=max_gt_boxes, + scale_to_absolute=scale_to_absolute) + metric_ops = eval_util.get_eval_metric_ops_for_evaluators( + eval_config, categories, eval_dict) + _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP'] + _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP'] + + with self.test_session() as sess: + metrics = {} + for key, (value_op, _) in six.iteritems(metric_ops): + metrics[key] = value_op + sess.run(update_op_boxes) + sess.run(update_op_masks) + metrics = sess.run(metrics) + self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP']) + self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP']) + + @parameterized.parameters( + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True}, + {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False}, + {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False} + ) + def test_get_eval_metric_ops_for_coco_detections_and_resized_masks( + self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend( + ['coco_detection_metrics', 'coco_mask_metrics']) + categories = self._get_categories_list() + eval_dict = self._make_evaluation_dict(batch_size=batch_size, + max_gt_boxes=max_gt_boxes, + scale_to_absolute=scale_to_absolute, + resized_groundtruth_masks=True) + metric_ops = eval_util.get_eval_metric_ops_for_evaluators( + eval_config, categories, eval_dict) + _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP'] + _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP'] + + with self.test_session() as sess: + metrics = {} + for key, (value_op, _) in six.iteritems(metric_ops): + metrics[key] = value_op + sess.run(update_op_boxes) + sess.run(update_op_masks) + metrics = sess.run(metrics) + self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP']) + self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP']) + + def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend(['unsupported_metric']) + categories = self._get_categories_list() + eval_dict = self._make_evaluation_dict() + with self.assertRaises(ValueError): + eval_util.get_eval_metric_ops_for_evaluators( + eval_config, categories, eval_dict) + + def test_get_eval_metric_ops_for_evaluators(self): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend([ + 'coco_detection_metrics', 'coco_mask_metrics', + 'precision_at_recall_detection_metrics' + ]) + eval_config.include_metrics_per_category = True + eval_config.recall_lower_bound = 0.2 + eval_config.recall_upper_bound = 0.6 + + evaluator_options = eval_util.evaluator_options_from_eval_config( + eval_config) + self.assertTrue(evaluator_options['coco_detection_metrics'] + ['include_metrics_per_category']) + self.assertTrue( + evaluator_options['coco_mask_metrics']['include_metrics_per_category']) + self.assertAlmostEqual( + evaluator_options['precision_at_recall_detection_metrics'] + ['recall_lower_bound'], eval_config.recall_lower_bound) + self.assertAlmostEqual( + evaluator_options['precision_at_recall_detection_metrics'] + ['recall_upper_bound'], eval_config.recall_upper_bound) + + def test_get_evaluator_with_evaluator_options(self): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend( + ['coco_detection_metrics', 'precision_at_recall_detection_metrics']) + eval_config.include_metrics_per_category = True + eval_config.recall_lower_bound = 0.2 + eval_config.recall_upper_bound = 0.6 + categories = self._get_categories_list() + + evaluator_options = eval_util.evaluator_options_from_eval_config( + eval_config) + evaluator = eval_util.get_evaluators(eval_config, categories, + evaluator_options) + + self.assertTrue(evaluator[0]._include_metrics_per_category) + self.assertAlmostEqual(evaluator[1]._recall_lower_bound, + eval_config.recall_lower_bound) + self.assertAlmostEqual(evaluator[1]._recall_upper_bound, + eval_config.recall_upper_bound) + + def test_get_evaluator_with_no_evaluator_options(self): + eval_config = eval_pb2.EvalConfig() + eval_config.metrics_set.extend( + ['coco_detection_metrics', 'precision_at_recall_detection_metrics']) + eval_config.include_metrics_per_category = True + eval_config.recall_lower_bound = 0.2 + eval_config.recall_upper_bound = 0.6 + categories = self._get_categories_list() + + evaluator = eval_util.get_evaluators( + eval_config, categories, evaluator_options=None) + + # Even though we are setting eval_config.include_metrics_per_category = True + # and bounds on recall, these options are never passed into the + # DetectionEvaluator constructor (via `evaluator_options`). + self.assertFalse(evaluator[0]._include_metrics_per_category) + self.assertAlmostEqual(evaluator[1]._recall_lower_bound, 0.0) + self.assertAlmostEqual(evaluator[1]._recall_upper_bound, 1.0) + + def test_padded_image_result_dict(self): + + input_data_fields = fields.InputDataFields + detection_fields = fields.DetectionResultFields + key = tf.constant([str(i) for i in range(2)]) + + detection_boxes = np.array([[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]], + dtype=np.float32) + detections = { + detection_fields.detection_boxes: + tf.constant(detection_boxes), + detection_fields.detection_scores: + tf.constant([[1.], [1.]]), + detection_fields.detection_classes: + tf.constant([[1], [2]]), + detection_fields.num_detections: + tf.constant([1, 1]) + } + + gt_boxes = detection_boxes + groundtruth = { + input_data_fields.groundtruth_boxes: + tf.constant(gt_boxes), + input_data_fields.groundtruth_classes: + tf.constant([[1.], [1.]]), + } + + image = tf.zeros((2, 100, 100, 3), dtype=tf.float32) + + true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]]) + original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]]) + + result = eval_util.result_dict_for_batched_example( + image, key, detections, groundtruth, + scale_to_absolute=True, + true_image_shapes=true_image_shapes, + original_image_spatial_shapes=original_image_spatial_shapes, + max_gt_boxes=tf.constant(1)) + + with self.test_session() as sess: + result = sess.run(result) + self.assertAllEqual( + [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]], + result[input_data_fields.groundtruth_boxes]) + + # Predictions from the model are not scaled. + self.assertAllEqual( + [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]], + result[detection_fields.detection_boxes]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/export_inference_graph.py b/export_inference_graph.py new file mode 100644 index 0000000..bc56475 --- /dev/null +++ b/export_inference_graph.py @@ -0,0 +1,162 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Tool to export an object detection model for inference. + +Prepares an object detection tensorflow graph for inference using model +configuration and a trained checkpoint. Outputs inference +graph, associated checkpoint files, a frozen inference graph and a +SavedModel (https://tensorflow.github.io/serving/serving_basic.html). + +The inference graph contains one of three input nodes depending on the user +specified option. + * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3] + * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None] + containing encoded PNG or JPEG images. Image resolutions are expected to be + the same if more than 1 image is provided. + * `tf_example`: Accepts a 1-D string tensor of shape [None] containing + serialized TFExample protos. Image resolutions are expected to be the same + if more than 1 image is provided. + +and the following output nodes returned by the model.postprocess(..): + * `num_detections`: Outputs float32 tensors of the form [batch] + that specifies the number of valid boxes per image in the batch. + * `detection_boxes`: Outputs float32 tensors of the form + [batch, num_boxes, 4] containing detected boxes. + * `detection_scores`: Outputs float32 tensors of the form + [batch, num_boxes] containing class scores for the detections. + * `detection_classes`: Outputs float32 tensors of the form + [batch, num_boxes] containing classes for the detections. + * `raw_detection_boxes`: Outputs float32 tensors of the form + [batch, raw_num_boxes, 4] containing detection boxes without + post-processing. + * `raw_detection_scores`: Outputs float32 tensors of the form + [batch, raw_num_boxes, num_classes_with_background] containing class score + logits for raw detection boxes. + * `detection_masks`: (Optional) Outputs float32 tensors of the form + [batch, num_boxes, mask_height, mask_width] containing predicted instance + masks for each box if its present in the dictionary of postprocessed + tensors returned by the model. + * detection_multiclass_scores: (Optional) Outputs float32 tensor of shape + [batch, num_boxes, num_classes_with_background] for containing class + score distribution for detected boxes including background if any. + * detection_features: (Optional) float32 tensor of shape + [batch, num_boxes, roi_height, roi_width, depth] + containing classifier features + +Notes: + * This tool uses `use_moving_averages` from eval_config to decide which + weights to freeze. + +Example Usage: +-------------- +python export_inference_graph \ + --input_type image_tensor \ + --pipeline_config_path path/to/ssd_inception_v2.config \ + --trained_checkpoint_prefix path/to/model.ckpt \ + --output_directory path/to/exported_model_directory + +The expected output would be in the directory +path/to/exported_model_directory (which is created if it does not exist) +with contents: + - inference_graph.pbtxt + - model.ckpt.data-00000-of-00001 + - model.ckpt.info + - model.ckpt.meta + - frozen_inference_graph.pb + + saved_model (a directory) + +Config overrides (see the `config_override` flag) are text protobufs +(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override +certain fields in the provided pipeline_config_path. These are useful for +making small changes to the inference graph that differ from the training or +eval config. + +Example Usage (in which we change the second stage post-processing score +threshold to be 0.5): + +python export_inference_graph \ + --input_type image_tensor \ + --pipeline_config_path path/to/ssd_inception_v2.config \ + --trained_checkpoint_prefix path/to/model.ckpt \ + --output_directory path/to/exported_model_directory \ + --config_override " \ + model{ \ + faster_rcnn { \ + second_stage_post_processing { \ + batch_non_max_suppression { \ + score_threshold: 0.5 \ + } \ + } \ + } \ + }" +""" +import tensorflow as tf +from google.protobuf import text_format +from object_detection import exporter +from object_detection.protos import pipeline_pb2 + +slim = tf.contrib.slim +flags = tf.app.flags + +flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' + 'one of [`image_tensor`, `encoded_image_string_tensor`, ' + '`tf_example`]') +flags.DEFINE_string('input_shape', None, + 'If input_type is `image_tensor`, this can explicitly set ' + 'the shape of this input tensor to a fixed size. The ' + 'dimensions are to be provided as a comma-separated list ' + 'of integers. A value of -1 can be used for unknown ' + 'dimensions. If not specified, for an `image_tensor, the ' + 'default shape will be partially specified as ' + '`[None, None, None, 3]`.') +flags.DEFINE_string('pipeline_config_path', None, + 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' + 'file.') +flags.DEFINE_string('trained_checkpoint_prefix', None, + 'Path to trained checkpoint, typically of the form ' + 'path/to/model.ckpt') +flags.DEFINE_string('output_directory', None, 'Path to write outputs.') +flags.DEFINE_string('config_override', '', + 'pipeline_pb2.TrainEvalPipelineConfig ' + 'text proto to override pipeline_config_path.') +flags.DEFINE_boolean('write_inference_graph', False, + 'If true, writes inference graph to disk.') +tf.app.flags.mark_flag_as_required('pipeline_config_path') +tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix') +tf.app.flags.mark_flag_as_required('output_directory') +FLAGS = flags.FLAGS + + +def main(_): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: + text_format.Merge(f.read(), pipeline_config) + text_format.Merge(FLAGS.config_override, pipeline_config) + if FLAGS.input_shape: + input_shape = [ + int(dim) if dim != '-1' else None + for dim in FLAGS.input_shape.split(',') + ] + else: + input_shape = None + exporter.export_inference_graph( + FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix, + FLAGS.output_directory, input_shape=input_shape, + write_inference_graph=FLAGS.write_inference_graph) + + +if __name__ == '__main__': + tf.app.run() diff --git a/export_tflite_ssd_graph.py b/export_tflite_ssd_graph.py new file mode 100644 index 0000000..2cce386 --- /dev/null +++ b/export_tflite_ssd_graph.py @@ -0,0 +1,143 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Exports an SSD detection model to use with tf-lite. + +Outputs file: +* A tflite compatible frozen graph - $output_directory/tflite_graph.pb + +The exported graph has the following input and output nodes. + +Inputs: +'normalized_input_image_tensor': a float32 tensor of shape +[1, height, width, 3] containing the normalized input image. Note that the +height and width must be compatible with the height and width configured in +the fixed_shape_image resizer options in the pipeline config proto. + +In floating point Mobilenet model, 'normalized_image_tensor' has values +between [-1,1). This typically means mapping each pixel (linearly) +to a value between [-1, 1]. Input image +values between 0 and 255 are scaled by (1/128.0) and then a value of +-1 is added to them to ensure the range is [-1,1). +In quantized Mobilenet model, 'normalized_image_tensor' has values between [0, +255]. +In general, see the `preprocess` function defined in the feature extractor class +in the object_detection/models directory. + +Outputs: +If add_postprocessing_op is true: frozen graph adds a + TFLite_Detection_PostProcess custom op node has four outputs: + detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box + locations + detection_classes: a float32 tensor of shape [1, num_boxes] + with class indices + detection_scores: a float32 tensor of shape [1, num_boxes] + with class scores + num_boxes: a float32 tensor of size 1 containing the number of detected boxes +else: + the graph has two outputs: + 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4] + containing the encoded box predictions. + 'raw_outputs/class_predictions': a float32 tensor of shape + [1, num_anchors, num_classes] containing the class scores for each anchor + after applying score conversion. + +Example Usage: +-------------- +python object_detection/export_tflite_ssd_graph \ + --pipeline_config_path path/to/ssd_mobilenet.config \ + --trained_checkpoint_prefix path/to/model.ckpt \ + --output_directory path/to/exported_model_directory + +The expected output would be in the directory +path/to/exported_model_directory (which is created if it does not exist) +with contents: + - tflite_graph.pbtxt + - tflite_graph.pb +Config overrides (see the `config_override` flag) are text protobufs +(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override +certain fields in the provided pipeline_config_path. These are useful for +making small changes to the inference graph that differ from the training or +eval config. + +Example Usage (in which we change the NMS iou_threshold to be 0.5 and +NMS score_threshold to be 0.0): +python object_detection/export_tflite_ssd_graph \ + --pipeline_config_path path/to/ssd_mobilenet.config \ + --trained_checkpoint_prefix path/to/model.ckpt \ + --output_directory path/to/exported_model_directory + --config_override " \ + model{ \ + ssd{ \ + post_processing { \ + batch_non_max_suppression { \ + score_threshold: 0.0 \ + iou_threshold: 0.5 \ + } \ + } \ + } \ + } \ + " +""" + +import tensorflow as tf +from google.protobuf import text_format +from object_detection import export_tflite_ssd_graph_lib +from object_detection.protos import pipeline_pb2 + +flags = tf.app.flags +flags.DEFINE_string('output_directory', None, 'Path to write outputs.') +flags.DEFINE_string( + 'pipeline_config_path', None, + 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' + 'file.') +flags.DEFINE_string('trained_checkpoint_prefix', None, 'Checkpoint prefix.') +flags.DEFINE_integer('max_detections', 10, + 'Maximum number of detections (boxes) to show.') +flags.DEFINE_integer('max_classes_per_detection', 1, + 'Maximum number of classes to output per detection box.') +flags.DEFINE_integer( + 'detections_per_class', 100, + 'Number of anchors used per class in Regular Non-Max-Suppression.') +flags.DEFINE_bool('add_postprocessing_op', True, + 'Add TFLite custom op for postprocessing to the graph.') +flags.DEFINE_bool( + 'use_regular_nms', False, + 'Flag to set postprocessing op to use Regular NMS instead of Fast NMS.') +flags.DEFINE_string( + 'config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig ' + 'text proto to override pipeline_config_path.') + +FLAGS = flags.FLAGS + + +def main(argv): + del argv # Unused. + flags.mark_flag_as_required('output_directory') + flags.mark_flag_as_required('pipeline_config_path') + flags.mark_flag_as_required('trained_checkpoint_prefix') + + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + + with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: + text_format.Merge(f.read(), pipeline_config) + text_format.Merge(FLAGS.config_override, pipeline_config) + export_tflite_ssd_graph_lib.export_tflite_graph( + pipeline_config, FLAGS.trained_checkpoint_prefix, FLAGS.output_directory, + FLAGS.add_postprocessing_op, FLAGS.max_detections, + FLAGS.max_classes_per_detection, use_regular_nms=FLAGS.use_regular_nms) + + +if __name__ == '__main__': + tf.app.run(main) diff --git a/export_tflite_ssd_graph_lib.py b/export_tflite_ssd_graph_lib.py new file mode 100644 index 0000000..014ecea --- /dev/null +++ b/export_tflite_ssd_graph_lib.py @@ -0,0 +1,332 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Exports an SSD detection model to use with tf-lite. + +See export_tflite_ssd_graph.py for usage. +""" +import os +import tempfile +import numpy as np +import tensorflow as tf +from tensorflow.core.framework import attr_value_pb2 +from tensorflow.core.framework import types_pb2 +from tensorflow.core.protobuf import saver_pb2 +from tensorflow.tools.graph_transforms import TransformGraph +from object_detection import exporter +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.builders import post_processing_builder +from object_detection.core import box_list + +_DEFAULT_NUM_CHANNELS = 3 +_DEFAULT_NUM_COORD_BOX = 4 + + +def get_const_center_size_encoded_anchors(anchors): + """Exports center-size encoded anchors as a constant tensor. + + Args: + anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor + boxes + + Returns: + encoded_anchors: a float32 constant tensor of shape [num_anchors, 4] + containing the anchor boxes. + """ + anchor_boxlist = box_list.BoxList(anchors) + y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes() + num_anchors = y.get_shape().as_list() + + with tf.Session() as sess: + y_out, x_out, h_out, w_out = sess.run([y, x, h, w]) + encoded_anchors = tf.constant( + np.transpose(np.stack((y_out, x_out, h_out, w_out))), + dtype=tf.float32, + shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX], + name='anchors') + return encoded_anchors + + +def append_postprocessing_op(frozen_graph_def, + max_detections, + max_classes_per_detection, + nms_score_threshold, + nms_iou_threshold, + num_classes, + scale_values, + detections_per_class=100, + use_regular_nms=False, + additional_output_tensors=()): + """Appends postprocessing custom op. + + Args: + frozen_graph_def: Frozen GraphDef for SSD model after freezing the + checkpoint + max_detections: Maximum number of detections (boxes) to show + max_classes_per_detection: Number of classes to display per detection + nms_score_threshold: Score threshold used in Non-maximal suppression in + post-processing + nms_iou_threshold: Intersection-over-union threshold used in Non-maximal + suppression in post-processing + num_classes: number of classes in SSD detector + scale_values: scale values is a dict with following key-value pairs + {y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode + centersize boxes + detections_per_class: In regular NonMaxSuppression, number of anchors used + for NonMaxSuppression per class + use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of + Fast NMS. + additional_output_tensors: Array of additional tensor names to output. + Tensors are appended after postprocessing output. + + Returns: + transformed_graph_def: Frozen GraphDef with postprocessing custom op + appended + TFLite_Detection_PostProcess custom op node has four outputs: + detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box + locations + detection_classes: a float32 tensor of shape [1, num_boxes] + with class indices + detection_scores: a float32 tensor of shape [1, num_boxes] + with class scores + num_boxes: a float32 tensor of size 1 containing the number of detected + boxes + """ + new_output = frozen_graph_def.node.add() + new_output.op = 'TFLite_Detection_PostProcess' + new_output.name = 'TFLite_Detection_PostProcess' + new_output.attr['_output_quantized'].CopyFrom( + attr_value_pb2.AttrValue(b=True)) + new_output.attr['_output_types'].list.type.extend([ + types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, + types_pb2.DT_FLOAT + ]) + new_output.attr['_support_output_type_float_in_quantized_op'].CopyFrom( + attr_value_pb2.AttrValue(b=True)) + new_output.attr['max_detections'].CopyFrom( + attr_value_pb2.AttrValue(i=max_detections)) + new_output.attr['max_classes_per_detection'].CopyFrom( + attr_value_pb2.AttrValue(i=max_classes_per_detection)) + new_output.attr['nms_score_threshold'].CopyFrom( + attr_value_pb2.AttrValue(f=nms_score_threshold.pop())) + new_output.attr['nms_iou_threshold'].CopyFrom( + attr_value_pb2.AttrValue(f=nms_iou_threshold.pop())) + new_output.attr['num_classes'].CopyFrom( + attr_value_pb2.AttrValue(i=num_classes)) + + new_output.attr['y_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['y_scale'].pop())) + new_output.attr['x_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['x_scale'].pop())) + new_output.attr['h_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['h_scale'].pop())) + new_output.attr['w_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['w_scale'].pop())) + new_output.attr['detections_per_class'].CopyFrom( + attr_value_pb2.AttrValue(i=detections_per_class)) + new_output.attr['use_regular_nms'].CopyFrom( + attr_value_pb2.AttrValue(b=use_regular_nms)) + + new_output.input.extend( + ['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors']) + # Transform the graph to append new postprocessing op + input_names = [] + output_names = ['TFLite_Detection_PostProcess' + ] + list(additional_output_tensors) + transforms = ['strip_unused_nodes'] + transformed_graph_def = TransformGraph(frozen_graph_def, input_names, + output_names, transforms) + return transformed_graph_def + + +def export_tflite_graph(pipeline_config, + trained_checkpoint_prefix, + output_dir, + add_postprocessing_op, + max_detections, + max_classes_per_detection, + detections_per_class=100, + use_regular_nms=False, + binary_graph_name='tflite_graph.pb', + txt_graph_name='tflite_graph.pbtxt', + additional_output_tensors=()): + """Exports a tflite compatible graph and anchors for ssd detection model. + + Anchors are written to a tensor and tflite compatible graph + is written to output_dir/tflite_graph.pb. + + Args: + pipeline_config: a pipeline.proto object containing the configuration for + SSD model to export. + trained_checkpoint_prefix: a file prefix for the checkpoint containing the + trained parameters of the SSD model. + output_dir: A directory to write the tflite graph and anchor file to. + add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a + TFLite_Detection_PostProcess custom op + max_detections: Maximum number of detections (boxes) to show + max_classes_per_detection: Number of classes to display per detection + detections_per_class: In regular NonMaxSuppression, number of anchors used + for NonMaxSuppression per class + use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of + Fast NMS. + binary_graph_name: Name of the exported graph file in binary format. + txt_graph_name: Name of the exported graph file in text format. + additional_output_tensors: Array of additional tensor names to output. + Additional tensors are appended to the end of output tensor list. + + Raises: + ValueError: if the pipeline config contains models other than ssd or uses an + fixed_shape_resizer and provides a shape as well. + """ + tf.gfile.MakeDirs(output_dir) + if pipeline_config.model.WhichOneof('model') != 'ssd': + raise ValueError('Only ssd models are supported in tflite. ' + 'Found {} in config'.format( + pipeline_config.model.WhichOneof('model'))) + + num_classes = pipeline_config.model.ssd.num_classes + nms_score_threshold = { + pipeline_config.model.ssd.post_processing.batch_non_max_suppression + .score_threshold + } + nms_iou_threshold = { + pipeline_config.model.ssd.post_processing.batch_non_max_suppression + .iou_threshold + } + scale_values = {} + scale_values['y_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale + } + scale_values['x_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale + } + scale_values['h_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale + } + scale_values['w_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale + } + + image_resizer_config = pipeline_config.model.ssd.image_resizer + image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') + num_channels = _DEFAULT_NUM_CHANNELS + if image_resizer == 'fixed_shape_resizer': + height = image_resizer_config.fixed_shape_resizer.height + width = image_resizer_config.fixed_shape_resizer.width + if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: + num_channels = 1 + shape = [1, height, width, num_channels] + # shape = [1, 322, 322, num_channels] + print(shape) + else: + raise ValueError( + 'Only fixed_shape_resizer' + 'is supported with tflite. Found {}'.format( + image_resizer_config.WhichOneof('image_resizer_oneof'))) + + image = tf.placeholder( + tf.float32, shape=shape, name='normalized_input_image_tensor') + + detection_model = model_builder.build( + pipeline_config.model, is_training=False) + predicted_tensors = detection_model.predict(image, true_image_shapes=None) + # The score conversion occurs before the post-processing custom op + _, score_conversion_fn = post_processing_builder.build( + pipeline_config.model.ssd.post_processing) + class_predictions = score_conversion_fn( + predicted_tensors['class_predictions_with_background']) + + with tf.name_scope('raw_outputs'): + # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4] + # containing the encoded box predictions. Note that these are raw + # predictions and no Non-Max suppression is applied on them and + # no decode center size boxes is applied to them. + tf.identity(predicted_tensors['box_encodings'], name='box_encodings') + # 'raw_outputs/class_predictions': a float32 tensor of shape + # [1, num_anchors, num_classes] containing the class scores for each anchor + # after applying score conversion. + tf.identity(class_predictions, name='class_predictions') + # 'anchors': a float32 tensor of shape + # [4, num_anchors] containing the anchors as a constant node. + tf.identity( + get_const_center_size_encoded_anchors(predicted_tensors['anchors']), + name='anchors') + + # Add global step to the graph, so we know the training step number when we + # evaluate the model. + tf.train.get_or_create_global_step() + + # graph rewriter + is_quantized = pipeline_config.HasField('graph_rewriter') + if is_quantized: + graph_rewriter_config = pipeline_config.graph_rewriter + graph_rewriter_fn = graph_rewriter_builder.build( + graph_rewriter_config, is_training=False) + graph_rewriter_fn() + + if pipeline_config.model.ssd.feature_extractor.HasField('fpn'): + exporter.rewrite_nn_resize_op(is_quantized) + + # freeze the graph + saver_kwargs = {} + if pipeline_config.eval_config.use_moving_averages: + saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 + moving_average_checkpoint = tempfile.NamedTemporaryFile() + exporter.replace_variable_values_with_moving_averages( + tf.get_default_graph(), trained_checkpoint_prefix, + moving_average_checkpoint.name) + checkpoint_to_use = moving_average_checkpoint.name + else: + checkpoint_to_use = trained_checkpoint_prefix + + saver = tf.train.Saver(**saver_kwargs) + input_saver_def = saver.as_saver_def() + frozen_graph_def = exporter.freeze_graph_with_def_protos( + input_graph_def=tf.get_default_graph().as_graph_def(), + input_saver_def=input_saver_def, + input_checkpoint=checkpoint_to_use, + output_node_names=','.join([ + 'raw_outputs/box_encodings', 'raw_outputs/class_predictions', + 'anchors' + ] + list(additional_output_tensors)), + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + clear_devices=True, + output_graph='', + initializer_nodes='') + + # Add new operation to do post processing in a custom op (TF Lite only) + if add_postprocessing_op: + transformed_graph_def = append_postprocessing_op( + frozen_graph_def, + max_detections, + max_classes_per_detection, + nms_score_threshold, + nms_iou_threshold, + num_classes, + scale_values, + detections_per_class, + use_regular_nms, + additional_output_tensors=additional_output_tensors) + else: + # Return frozen without adding post-processing custom op + transformed_graph_def = frozen_graph_def + + binary_graph = os.path.join(output_dir, binary_graph_name) + with tf.gfile.GFile(binary_graph, 'wb') as f: + f.write(transformed_graph_def.SerializeToString()) + txt_graph = os.path.join(output_dir, txt_graph_name) + with tf.gfile.GFile(txt_graph, 'w') as f: + f.write(str(transformed_graph_def)) diff --git a/export_tflite_ssd_graph_lib_bak.py b/export_tflite_ssd_graph_lib_bak.py new file mode 100644 index 0000000..cf276fd --- /dev/null +++ b/export_tflite_ssd_graph_lib_bak.py @@ -0,0 +1,330 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Exports an SSD detection model to use with tf-lite. + +See export_tflite_ssd_graph.py for usage. +""" +import os +import tempfile +import numpy as np +import tensorflow as tf +from tensorflow.core.framework import attr_value_pb2 +from tensorflow.core.framework import types_pb2 +from tensorflow.core.protobuf import saver_pb2 +from tensorflow.tools.graph_transforms import TransformGraph +from object_detection import exporter +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.builders import post_processing_builder +from object_detection.core import box_list + +_DEFAULT_NUM_CHANNELS = 3 +_DEFAULT_NUM_COORD_BOX = 4 + + +def get_const_center_size_encoded_anchors(anchors): + """Exports center-size encoded anchors as a constant tensor. + + Args: + anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor + boxes + + Returns: + encoded_anchors: a float32 constant tensor of shape [num_anchors, 4] + containing the anchor boxes. + """ + anchor_boxlist = box_list.BoxList(anchors) + y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes() + num_anchors = y.get_shape().as_list() + + with tf.Session() as sess: + y_out, x_out, h_out, w_out = sess.run([y, x, h, w]) + encoded_anchors = tf.constant( + np.transpose(np.stack((y_out, x_out, h_out, w_out))), + dtype=tf.float32, + shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX], + name='anchors') + return encoded_anchors + + +def append_postprocessing_op(frozen_graph_def, + max_detections, + max_classes_per_detection, + nms_score_threshold, + nms_iou_threshold, + num_classes, + scale_values, + detections_per_class=100, + use_regular_nms=False, + additional_output_tensors=()): + """Appends postprocessing custom op. + + Args: + frozen_graph_def: Frozen GraphDef for SSD model after freezing the + checkpoint + max_detections: Maximum number of detections (boxes) to show + max_classes_per_detection: Number of classes to display per detection + nms_score_threshold: Score threshold used in Non-maximal suppression in + post-processing + nms_iou_threshold: Intersection-over-union threshold used in Non-maximal + suppression in post-processing + num_classes: number of classes in SSD detector + scale_values: scale values is a dict with following key-value pairs + {y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode + centersize boxes + detections_per_class: In regular NonMaxSuppression, number of anchors used + for NonMaxSuppression per class + use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of + Fast NMS. + additional_output_tensors: Array of additional tensor names to output. + Tensors are appended after postprocessing output. + + Returns: + transformed_graph_def: Frozen GraphDef with postprocessing custom op + appended + TFLite_Detection_PostProcess custom op node has four outputs: + detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box + locations + detection_classes: a float32 tensor of shape [1, num_boxes] + with class indices + detection_scores: a float32 tensor of shape [1, num_boxes] + with class scores + num_boxes: a float32 tensor of size 1 containing the number of detected + boxes + """ + new_output = frozen_graph_def.node.add() + new_output.op = 'TFLite_Detection_PostProcess' + new_output.name = 'TFLite_Detection_PostProcess' + new_output.attr['_output_quantized'].CopyFrom( + attr_value_pb2.AttrValue(b=True)) + new_output.attr['_output_types'].list.type.extend([ + types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, + types_pb2.DT_FLOAT + ]) + new_output.attr['_support_output_type_float_in_quantized_op'].CopyFrom( + attr_value_pb2.AttrValue(b=True)) + new_output.attr['max_detections'].CopyFrom( + attr_value_pb2.AttrValue(i=max_detections)) + new_output.attr['max_classes_per_detection'].CopyFrom( + attr_value_pb2.AttrValue(i=max_classes_per_detection)) + new_output.attr['nms_score_threshold'].CopyFrom( + attr_value_pb2.AttrValue(f=nms_score_threshold.pop())) + new_output.attr['nms_iou_threshold'].CopyFrom( + attr_value_pb2.AttrValue(f=nms_iou_threshold.pop())) + new_output.attr['num_classes'].CopyFrom( + attr_value_pb2.AttrValue(i=num_classes)) + + new_output.attr['y_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['y_scale'].pop())) + new_output.attr['x_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['x_scale'].pop())) + new_output.attr['h_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['h_scale'].pop())) + new_output.attr['w_scale'].CopyFrom( + attr_value_pb2.AttrValue(f=scale_values['w_scale'].pop())) + new_output.attr['detections_per_class'].CopyFrom( + attr_value_pb2.AttrValue(i=detections_per_class)) + new_output.attr['use_regular_nms'].CopyFrom( + attr_value_pb2.AttrValue(b=use_regular_nms)) + + new_output.input.extend( + ['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors']) + # Transform the graph to append new postprocessing op + input_names = [] + output_names = ['TFLite_Detection_PostProcess' + ] + list(additional_output_tensors) + transforms = ['strip_unused_nodes'] + transformed_graph_def = TransformGraph(frozen_graph_def, input_names, + output_names, transforms) + return transformed_graph_def + + +def export_tflite_graph(pipeline_config, + trained_checkpoint_prefix, + output_dir, + add_postprocessing_op, + max_detections, + max_classes_per_detection, + detections_per_class=100, + use_regular_nms=False, + binary_graph_name='tflite_graph.pb', + txt_graph_name='tflite_graph.pbtxt', + additional_output_tensors=()): + """Exports a tflite compatible graph and anchors for ssd detection model. + + Anchors are written to a tensor and tflite compatible graph + is written to output_dir/tflite_graph.pb. + + Args: + pipeline_config: a pipeline.proto object containing the configuration for + SSD model to export. + trained_checkpoint_prefix: a file prefix for the checkpoint containing the + trained parameters of the SSD model. + output_dir: A directory to write the tflite graph and anchor file to. + add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a + TFLite_Detection_PostProcess custom op + max_detections: Maximum number of detections (boxes) to show + max_classes_per_detection: Number of classes to display per detection + detections_per_class: In regular NonMaxSuppression, number of anchors used + for NonMaxSuppression per class + use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of + Fast NMS. + binary_graph_name: Name of the exported graph file in binary format. + txt_graph_name: Name of the exported graph file in text format. + additional_output_tensors: Array of additional tensor names to output. + Additional tensors are appended to the end of output tensor list. + + Raises: + ValueError: if the pipeline config contains models other than ssd or uses an + fixed_shape_resizer and provides a shape as well. + """ + tf.gfile.MakeDirs(output_dir) + if pipeline_config.model.WhichOneof('model') != 'ssd': + raise ValueError('Only ssd models are supported in tflite. ' + 'Found {} in config'.format( + pipeline_config.model.WhichOneof('model'))) + + num_classes = pipeline_config.model.ssd.num_classes + nms_score_threshold = { + pipeline_config.model.ssd.post_processing.batch_non_max_suppression + .score_threshold + } + nms_iou_threshold = { + pipeline_config.model.ssd.post_processing.batch_non_max_suppression + .iou_threshold + } + scale_values = {} + scale_values['y_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale + } + scale_values['x_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale + } + scale_values['h_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale + } + scale_values['w_scale'] = { + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale + } + + image_resizer_config = pipeline_config.model.ssd.image_resizer + image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') + num_channels = _DEFAULT_NUM_CHANNELS + if image_resizer == 'fixed_shape_resizer': + height = image_resizer_config.fixed_shape_resizer.height + width = image_resizer_config.fixed_shape_resizer.width + if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: + num_channels = 1 + shape = [1, height, width, num_channels] + else: + raise ValueError( + 'Only fixed_shape_resizer' + 'is supported with tflite. Found {}'.format( + image_resizer_config.WhichOneof('image_resizer_oneof'))) + + image = tf.placeholder( + tf.float32, shape=shape, name='normalized_input_image_tensor') + + detection_model = model_builder.build( + pipeline_config.model, is_training=False) + predicted_tensors = detection_model.predict(image, true_image_shapes=None) + # The score conversion occurs before the post-processing custom op + _, score_conversion_fn = post_processing_builder.build( + pipeline_config.model.ssd.post_processing) + class_predictions = score_conversion_fn( + predicted_tensors['class_predictions_with_background']) + + with tf.name_scope('raw_outputs'): + # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4] + # containing the encoded box predictions. Note that these are raw + # predictions and no Non-Max suppression is applied on them and + # no decode center size boxes is applied to them. + tf.identity(predicted_tensors['box_encodings'], name='box_encodings') + # 'raw_outputs/class_predictions': a float32 tensor of shape + # [1, num_anchors, num_classes] containing the class scores for each anchor + # after applying score conversion. + tf.identity(class_predictions, name='class_predictions') + # 'anchors': a float32 tensor of shape + # [4, num_anchors] containing the anchors as a constant node. + tf.identity( + get_const_center_size_encoded_anchors(predicted_tensors['anchors']), + name='anchors') + + # Add global step to the graph, so we know the training step number when we + # evaluate the model. + tf.train.get_or_create_global_step() + + # graph rewriter + is_quantized = pipeline_config.HasField('graph_rewriter') + if is_quantized: + graph_rewriter_config = pipeline_config.graph_rewriter + graph_rewriter_fn = graph_rewriter_builder.build( + graph_rewriter_config, is_training=False) + graph_rewriter_fn() + + if pipeline_config.model.ssd.feature_extractor.HasField('fpn'): + exporter.rewrite_nn_resize_op(is_quantized) + + # freeze the graph + saver_kwargs = {} + if pipeline_config.eval_config.use_moving_averages: + saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 + moving_average_checkpoint = tempfile.NamedTemporaryFile() + exporter.replace_variable_values_with_moving_averages( + tf.get_default_graph(), trained_checkpoint_prefix, + moving_average_checkpoint.name) + checkpoint_to_use = moving_average_checkpoint.name + else: + checkpoint_to_use = trained_checkpoint_prefix + + saver = tf.train.Saver(**saver_kwargs) + input_saver_def = saver.as_saver_def() + frozen_graph_def = exporter.freeze_graph_with_def_protos( + input_graph_def=tf.get_default_graph().as_graph_def(), + input_saver_def=input_saver_def, + input_checkpoint=checkpoint_to_use, + output_node_names=','.join([ + 'raw_outputs/box_encodings', 'raw_outputs/class_predictions', + 'anchors' + ] + list(additional_output_tensors)), + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + clear_devices=True, + output_graph='', + initializer_nodes='') + + # Add new operation to do post processing in a custom op (TF Lite only) + if add_postprocessing_op: + transformed_graph_def = append_postprocessing_op( + frozen_graph_def, + max_detections, + max_classes_per_detection, + nms_score_threshold, + nms_iou_threshold, + num_classes, + scale_values, + detections_per_class, + use_regular_nms, + additional_output_tensors=additional_output_tensors) + else: + # Return frozen without adding post-processing custom op + transformed_graph_def = frozen_graph_def + + binary_graph = os.path.join(output_dir, binary_graph_name) + with tf.gfile.GFile(binary_graph, 'wb') as f: + f.write(transformed_graph_def.SerializeToString()) + txt_graph = os.path.join(output_dir, txt_graph_name) + with tf.gfile.GFile(txt_graph, 'w') as f: + f.write(str(transformed_graph_def)) diff --git a/export_tflite_ssd_graph_lib_test.py b/export_tflite_ssd_graph_lib_test.py new file mode 100644 index 0000000..b469d59 --- /dev/null +++ b/export_tflite_ssd_graph_lib_test.py @@ -0,0 +1,414 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.export_tflite_ssd_graph.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import numpy as np +import six +import tensorflow as tf +from tensorflow.core.framework import types_pb2 +from object_detection import export_tflite_ssd_graph_lib +from object_detection import exporter +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.core import model +from object_detection.protos import graph_rewriter_pb2 +from object_detection.protos import pipeline_pb2 +from object_detection.protos import post_processing_pb2 + +if six.PY2: + import mock # pylint: disable=g-import-not-at-top +else: + from unittest import mock # pylint: disable=g-import-not-at-top + + +class FakeModel(model.DetectionModel): + + def __init__(self, add_detection_masks=False): + self._add_detection_masks = add_detection_masks + + def preprocess(self, inputs): + pass + + def predict(self, preprocessed_inputs, true_image_shapes): + features = tf.contrib.slim.conv2d(preprocessed_inputs, 3, 1) + with tf.control_dependencies([features]): + prediction_tensors = { + 'box_encodings': + tf.constant([[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]], + tf.float32), + 'class_predictions_with_background': + tf.constant([[[0.7, 0.6], [0.9, 0.0]]], tf.float32), + } + with tf.control_dependencies( + [tf.convert_to_tensor(features.get_shape().as_list()[1:3])]): + prediction_tensors['anchors'] = tf.constant( + [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 1.0]], tf.float32) + return prediction_tensors + + def postprocess(self, prediction_tensors, true_image_shapes): + pass + + def restore_map(self, checkpoint_path, from_detection_checkpoint): + pass + + def loss(self, prediction_dict, true_image_shapes): + pass + + def regularization_losses(self): + pass + + def updates(self): + pass + + +class ExportTfliteGraphTest(tf.test.TestCase): + + def _save_checkpoint_from_mock_model(self, + checkpoint_path, + use_moving_averages, + quantize=False, + num_channels=3): + g = tf.Graph() + with g.as_default(): + mock_model = FakeModel() + inputs = tf.placeholder(tf.float32, shape=[1, 10, 10, num_channels]) + mock_model.predict(inputs, true_image_shapes=None) + if use_moving_averages: + tf.train.ExponentialMovingAverage(0.0).apply() + tf.train.get_or_create_global_step() + if quantize: + graph_rewriter_config = graph_rewriter_pb2.GraphRewriter() + graph_rewriter_config.quantization.delay = 500000 + graph_rewriter_fn = graph_rewriter_builder.build( + graph_rewriter_config, is_training=False) + graph_rewriter_fn() + + saver = tf.train.Saver() + init = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init) + saver.save(sess, checkpoint_path) + + def _assert_quant_vars_exists(self, tflite_graph_file): + with tf.gfile.Open(tflite_graph_file) as f: + graph_string = f.read() + print(graph_string) + self.assertTrue('quant' in graph_string) + + def _import_graph_and_run_inference(self, tflite_graph_file, num_channels=3): + """Imports a tflite graph, runs single inference and returns outputs.""" + graph = tf.Graph() + with graph.as_default(): + graph_def = tf.GraphDef() + with tf.gfile.Open(tflite_graph_file) as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + input_tensor = graph.get_tensor_by_name('normalized_input_image_tensor:0') + box_encodings = graph.get_tensor_by_name('raw_outputs/box_encodings:0') + class_predictions = graph.get_tensor_by_name( + 'raw_outputs/class_predictions:0') + with self.test_session(graph) as sess: + [box_encodings_np, class_predictions_np] = sess.run( + [box_encodings, class_predictions], + feed_dict={input_tensor: np.random.rand(1, 10, 10, num_channels)}) + return box_encodings_np, class_predictions_np + + def _export_graph(self, + pipeline_config, + num_channels=3, + additional_output_tensors=()): + """Exports a tflite graph.""" + output_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(output_dir, 'model.ckpt') + tflite_graph_file = os.path.join(output_dir, 'tflite_graph.pb') + + quantize = pipeline_config.HasField('graph_rewriter') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, + use_moving_averages=pipeline_config.eval_config.use_moving_averages, + quantize=quantize, + num_channels=num_channels) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + + with tf.Graph().as_default(): + tf.identity( + tf.constant([[1, 2], [3, 4]], tf.uint8), name='UnattachedTensor') + export_tflite_ssd_graph_lib.export_tflite_graph( + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_dir=output_dir, + add_postprocessing_op=False, + max_detections=10, + max_classes_per_detection=1, + additional_output_tensors=additional_output_tensors) + return tflite_graph_file + + def _export_graph_with_postprocessing_op(self, + pipeline_config, + num_channels=3, + additional_output_tensors=()): + """Exports a tflite graph with custom postprocessing op.""" + output_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(output_dir, 'model.ckpt') + tflite_graph_file = os.path.join(output_dir, 'tflite_graph.pb') + + quantize = pipeline_config.HasField('graph_rewriter') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, + use_moving_averages=pipeline_config.eval_config.use_moving_averages, + quantize=quantize, + num_channels=num_channels) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + + with tf.Graph().as_default(): + tf.identity( + tf.constant([[1, 2], [3, 4]], tf.uint8), name='UnattachedTensor') + export_tflite_ssd_graph_lib.export_tflite_graph( + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_dir=output_dir, + add_postprocessing_op=True, + max_detections=10, + max_classes_per_detection=1, + additional_output_tensors=additional_output_tensors) + return tflite_graph_file + + def test_export_tflite_graph_with_moving_averages(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = True + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + + (box_encodings_np, class_predictions_np + ) = self._import_graph_and_run_inference(tflite_graph_file) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]]) + + def test_export_tflite_graph_without_moving_averages(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + (box_encodings_np, class_predictions_np + ) = self._import_graph_and_run_inference(tflite_graph_file) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]]) + + def test_export_tflite_graph_grayscale(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + (pipeline_config.model.ssd.image_resizer.fixed_shape_resizer + ).convert_to_grayscale = True + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config, num_channels=1) + self.assertTrue(os.path.exists(tflite_graph_file)) + (box_encodings_np, + class_predictions_np) = self._import_graph_and_run_inference( + tflite_graph_file, num_channels=1) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]]) + + def test_export_tflite_graph_with_quantization(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.graph_rewriter.quantization.delay = 500000 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + self._assert_quant_vars_exists(tflite_graph_file) + (box_encodings_np, class_predictions_np + ) = self._import_graph_and_run_inference(tflite_graph_file) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, [[[0.7, 0.6], [0.9, 0.0]]]) + + def test_export_tflite_graph_with_softmax_score_conversion(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.post_processing.score_converter = ( + post_processing_pb2.PostProcessing.SOFTMAX) + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + (box_encodings_np, class_predictions_np + ) = self._import_graph_and_run_inference(tflite_graph_file) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, + [[[0.524979, 0.475021], [0.710949, 0.28905]]]) + + def test_export_tflite_graph_with_sigmoid_score_conversion(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.post_processing.score_converter = ( + post_processing_pb2.PostProcessing.SIGMOID) + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph(pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + (box_encodings_np, class_predictions_np + ) = self._import_graph_and_run_inference(tflite_graph_file) + self.assertAllClose(box_encodings_np, + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]]) + self.assertAllClose(class_predictions_np, + [[[0.668188, 0.645656], [0.710949, 0.5]]]) + + def test_export_tflite_graph_with_postprocessing_op(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.post_processing.score_converter = ( + post_processing_pb2.PostProcessing.SIGMOID) + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + tflite_graph_file = self._export_graph_with_postprocessing_op( + pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + graph = tf.Graph() + with graph.as_default(): + graph_def = tf.GraphDef() + with tf.gfile.Open(tflite_graph_file) as f: + graph_def.ParseFromString(f.read()) + all_op_names = [node.name for node in graph_def.node] + self.assertIn('TFLite_Detection_PostProcess', all_op_names) + self.assertNotIn('UnattachedTensor', all_op_names) + for node in graph_def.node: + if node.name == 'TFLite_Detection_PostProcess': + self.assertTrue(node.attr['_output_quantized'].b is True) + self.assertTrue( + node.attr['_support_output_type_float_in_quantized_op'].b is True) + self.assertTrue(node.attr['y_scale'].f == 10.0) + self.assertTrue(node.attr['x_scale'].f == 10.0) + self.assertTrue(node.attr['h_scale'].f == 5.0) + self.assertTrue(node.attr['w_scale'].f == 5.0) + self.assertTrue(node.attr['num_classes'].i == 2) + self.assertTrue( + all([ + t == types_pb2.DT_FLOAT + for t in node.attr['_output_types'].list.type + ])) + + def test_export_tflite_graph_with_additional_tensors(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + tflite_graph_file = self._export_graph( + pipeline_config, additional_output_tensors=['UnattachedTensor']) + self.assertTrue(os.path.exists(tflite_graph_file)) + graph = tf.Graph() + with graph.as_default(): + graph_def = tf.GraphDef() + with tf.gfile.Open(tflite_graph_file) as f: + graph_def.ParseFromString(f.read()) + all_op_names = [node.name for node in graph_def.node] + self.assertIn('UnattachedTensor', all_op_names) + + def test_export_tflite_graph_with_postprocess_op_and_additional_tensors(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + pipeline_config.model.ssd.post_processing.score_converter = ( + post_processing_pb2.PostProcessing.SIGMOID) + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + tflite_graph_file = self._export_graph_with_postprocessing_op( + pipeline_config, additional_output_tensors=['UnattachedTensor']) + self.assertTrue(os.path.exists(tflite_graph_file)) + graph = tf.Graph() + with graph.as_default(): + graph_def = tf.GraphDef() + with tf.gfile.Open(tflite_graph_file) as f: + graph_def.ParseFromString(f.read()) + all_op_names = [node.name for node in graph_def.node] + self.assertIn('TFLite_Detection_PostProcess', all_op_names) + self.assertIn('UnattachedTensor', all_op_names) + + @mock.patch.object(exporter, 'rewrite_nn_resize_op') + def test_export_with_nn_resize_op_not_called_without_fpn(self, mock_get): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + tflite_graph_file = self._export_graph_with_postprocessing_op( + pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + mock_get.assert_not_called() + + @mock.patch.object(exporter, 'rewrite_nn_resize_op') + def test_export_with_nn_resize_op_called_with_fpn(self, mock_get): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.feature_extractor.fpn.min_level = 3 + pipeline_config.model.ssd.feature_extractor.fpn.max_level = 7 + tflite_graph_file = self._export_graph_with_postprocessing_op( + pipeline_config) + self.assertTrue(os.path.exists(tflite_graph_file)) + mock_get.assert_called_once() + + +if __name__ == '__main__': + tf.test.main() diff --git a/exporter.py b/exporter.py new file mode 100644 index 0000000..a109d3b --- /dev/null +++ b/exporter.py @@ -0,0 +1,543 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions to export object detection inference graph.""" +import os +import tempfile +import tensorflow as tf +from tensorflow.contrib.quantize.python import graph_matcher +from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python.tools import freeze_graph # pylint: disable=g-direct-tensorflow-import +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.core import standard_fields as fields +from object_detection.data_decoders import tf_example_decoder +from object_detection.utils import config_util +from object_detection.utils import shape_utils + +slim = tf.contrib.slim + +freeze_graph_with_def_protos = freeze_graph.freeze_graph_with_def_protos + + +def rewrite_nn_resize_op(is_quantized=False): + """Replaces a custom nearest-neighbor resize op with the Tensorflow version. + + Some graphs use this custom version for TPU-compatibility. + + Args: + is_quantized: True if the default graph is quantized. + """ + def remove_nn(): + """Remove nearest neighbor upsampling structure and replace with TF op.""" + input_pattern = graph_matcher.OpTypePattern( + 'FakeQuantWithMinMaxVars' if is_quantized else '*') + stack_1_pattern = graph_matcher.OpTypePattern( + 'Pack', inputs=[input_pattern, input_pattern], ordered_inputs=False) + stack_2_pattern = graph_matcher.OpTypePattern( + 'Pack', inputs=[stack_1_pattern, stack_1_pattern], ordered_inputs=False) + reshape_pattern = graph_matcher.OpTypePattern( + 'Reshape', inputs=[stack_2_pattern, 'Const'], ordered_inputs=False) + consumer_pattern = graph_matcher.OpTypePattern( + 'Add|AddV2|Max|Mul', inputs=[reshape_pattern, '*'], + ordered_inputs=False) + + match_counter = 0 + matcher = graph_matcher.GraphMatcher(consumer_pattern) + for match in matcher.match_graph(tf.get_default_graph()): + match_counter += 1 + projection_op = match.get_op(input_pattern) + reshape_op = match.get_op(reshape_pattern) + consumer_op = match.get_op(consumer_pattern) + nn_resize = tf.image.resize_nearest_neighbor( + projection_op.outputs[0], + reshape_op.outputs[0].shape.dims[1:3], + align_corners=False, + name=os.path.split(reshape_op.name)[0] + '/resize_nearest_neighbor') + + for index, op_input in enumerate(consumer_op.inputs): + if op_input == reshape_op.outputs[0]: + consumer_op._update_input(index, nn_resize) # pylint: disable=protected-access + break + + tf.logging.info('Found and fixed {} matches'.format(match_counter)) + return match_counter + + # Applying twice because both inputs to Add could be NN pattern + total_removals = 0 + while remove_nn(): + total_removals += 1 + # This number is chosen based on the nas-fpn architecture. + if total_removals > 4: + raise ValueError('Graph removal encountered a infinite loop.') + + +def replace_variable_values_with_moving_averages(graph, + current_checkpoint_file, + new_checkpoint_file, + no_ema_collection=None): + """Replaces variable values in the checkpoint with their moving averages. + + If the current checkpoint has shadow variables maintaining moving averages of + the variables defined in the graph, this function generates a new checkpoint + where the variables contain the values of their moving averages. + + Args: + graph: a tf.Graph object. + current_checkpoint_file: a checkpoint containing both original variables and + their moving averages. + new_checkpoint_file: file path to write a new checkpoint. + no_ema_collection: A list of namescope substrings to match the variables + to eliminate EMA. + """ + with graph.as_default(): + variable_averages = tf.train.ExponentialMovingAverage(0.0) + ema_variables_to_restore = variable_averages.variables_to_restore() + ema_variables_to_restore = config_util.remove_unecessary_ema( + ema_variables_to_restore, no_ema_collection) + with tf.Session() as sess: + read_saver = tf.train.Saver(ema_variables_to_restore) + read_saver.restore(sess, current_checkpoint_file) + write_saver = tf.train.Saver() + write_saver.save(sess, new_checkpoint_file) + + +def _image_tensor_input_placeholder(input_shape=None): + """Returns input placeholder and a 4-D uint8 image tensor.""" + if input_shape is None: + input_shape = (None, None, None, 3) + input_tensor = tf.placeholder( + dtype=tf.uint8, shape=input_shape, name='image_tensor') + return input_tensor, input_tensor + + +def _tf_example_input_placeholder(input_shape=None): + """Returns input that accepts a batch of strings with tf examples. + + Args: + input_shape: the shape to resize the output decoded images to (optional). + + Returns: + a tuple of input placeholder and the output decoded images. + """ + batch_tf_example_placeholder = tf.placeholder( + tf.string, shape=[None], name='tf_example') + def decode(tf_example_string_tensor): + tensor_dict = tf_example_decoder.TfExampleDecoder().decode( + tf_example_string_tensor) + image_tensor = tensor_dict[fields.InputDataFields.image] + if input_shape is not None: + image_tensor = tf.image.resize(image_tensor, input_shape[1:3]) + return image_tensor + return (batch_tf_example_placeholder, + shape_utils.static_or_dynamic_map_fn( + decode, + elems=batch_tf_example_placeholder, + dtype=tf.uint8, + parallel_iterations=32, + back_prop=False)) + + +def _encoded_image_string_tensor_input_placeholder(input_shape=None): + """Returns input that accepts a batch of PNG or JPEG strings. + + Args: + input_shape: the shape to resize the output decoded images to (optional). + + Returns: + a tuple of input placeholder and the output decoded images. + """ + batch_image_str_placeholder = tf.placeholder( + dtype=tf.string, + shape=[None], + name='encoded_image_string_tensor') + def decode(encoded_image_string_tensor): + image_tensor = tf.image.decode_image(encoded_image_string_tensor, + channels=3) + image_tensor.set_shape((None, None, 3)) + if input_shape is not None: + image_tensor = tf.image.resize(image_tensor, input_shape[1:3]) + return image_tensor + return (batch_image_str_placeholder, + tf.map_fn( + decode, + elems=batch_image_str_placeholder, + dtype=tf.uint8, + parallel_iterations=32, + back_prop=False)) + + +input_placeholder_fn_map = { + 'image_tensor': _image_tensor_input_placeholder, + 'encoded_image_string_tensor': + _encoded_image_string_tensor_input_placeholder, + 'tf_example': _tf_example_input_placeholder, +} + + +def add_output_tensor_nodes(postprocessed_tensors, + output_collection_name='inference_op'): + """Adds output nodes for detection boxes and scores. + + Adds the following nodes for output tensors - + * num_detections: float32 tensor of shape [batch_size]. + * detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4] + containing detected boxes. + * detection_scores: float32 tensor of shape [batch_size, num_boxes] + containing scores for the detected boxes. + * detection_multiclass_scores: (Optional) float32 tensor of shape + [batch_size, num_boxes, num_classes_with_background] for containing class + score distribution for detected boxes including background if any. + * detection_features: (Optional) float32 tensor of shape + [batch, num_boxes, roi_height, roi_width, depth] + containing classifier features + for each detected box + * detection_classes: float32 tensor of shape [batch_size, num_boxes] + containing class predictions for the detected boxes. + * detection_keypoints: (Optional) float32 tensor of shape + [batch_size, num_boxes, num_keypoints, 2] containing keypoints for each + detection box. + * detection_masks: (Optional) float32 tensor of shape + [batch_size, num_boxes, mask_height, mask_width] containing masks for each + detection box. + + Args: + postprocessed_tensors: a dictionary containing the following fields + 'detection_boxes': [batch, max_detections, 4] + 'detection_scores': [batch, max_detections] + 'detection_multiclass_scores': [batch, max_detections, + num_classes_with_background] + 'detection_features': [batch, num_boxes, roi_height, roi_width, depth] + 'detection_classes': [batch, max_detections] + 'detection_masks': [batch, max_detections, mask_height, mask_width] + (optional). + 'detection_keypoints': [batch, max_detections, num_keypoints, 2] + (optional). + 'num_detections': [batch] + output_collection_name: Name of collection to add output tensors to. + + Returns: + A tensor dict containing the added output tensor nodes. + """ + detection_fields = fields.DetectionResultFields + label_id_offset = 1 + boxes = postprocessed_tensors.get(detection_fields.detection_boxes) + scores = postprocessed_tensors.get(detection_fields.detection_scores) + multiclass_scores = postprocessed_tensors.get( + detection_fields.detection_multiclass_scores) + box_classifier_features = postprocessed_tensors.get( + detection_fields.detection_features) + raw_boxes = postprocessed_tensors.get(detection_fields.raw_detection_boxes) + raw_scores = postprocessed_tensors.get(detection_fields.raw_detection_scores) + classes = postprocessed_tensors.get( + detection_fields.detection_classes) + label_id_offset + keypoints = postprocessed_tensors.get(detection_fields.detection_keypoints) + masks = postprocessed_tensors.get(detection_fields.detection_masks) + num_detections = postprocessed_tensors.get(detection_fields.num_detections) + outputs = {} + outputs[detection_fields.detection_boxes] = tf.identity( + boxes, name=detection_fields.detection_boxes) + outputs[detection_fields.detection_scores] = tf.identity( + scores, name=detection_fields.detection_scores) + if multiclass_scores is not None: + outputs[detection_fields.detection_multiclass_scores] = tf.identity( + multiclass_scores, name=detection_fields.detection_multiclass_scores) + if box_classifier_features is not None: + outputs[detection_fields.detection_features] = tf.identity( + box_classifier_features, + name=detection_fields.detection_features) + outputs[detection_fields.detection_classes] = tf.identity( + classes, name=detection_fields.detection_classes) + outputs[detection_fields.num_detections] = tf.identity( + num_detections, name=detection_fields.num_detections) + if raw_boxes is not None: + outputs[detection_fields.raw_detection_boxes] = tf.identity( + raw_boxes, name=detection_fields.raw_detection_boxes) + if raw_scores is not None: + outputs[detection_fields.raw_detection_scores] = tf.identity( + raw_scores, name=detection_fields.raw_detection_scores) + if keypoints is not None: + outputs[detection_fields.detection_keypoints] = tf.identity( + keypoints, name=detection_fields.detection_keypoints) + if masks is not None: + outputs[detection_fields.detection_masks] = tf.identity( + masks, name=detection_fields.detection_masks) + for output_key in outputs: + tf.add_to_collection(output_collection_name, outputs[output_key]) + + return outputs + + +def write_saved_model(saved_model_path, + frozen_graph_def, + inputs, + outputs): + """Writes SavedModel to disk. + + If checkpoint_path is not None bakes the weights into the graph thereby + eliminating the need of checkpoint files during inference. If the model + was trained with moving averages, setting use_moving_averages to true + restores the moving averages, otherwise the original set of variables + is restored. + + Args: + saved_model_path: Path to write SavedModel. + frozen_graph_def: tf.GraphDef holding frozen graph. + inputs: The input placeholder tensor. + outputs: A tensor dictionary containing the outputs of a DetectionModel. + """ + with tf.Graph().as_default(): + with tf.Session() as sess: + + tf.import_graph_def(frozen_graph_def, name='') + + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path) + + tensor_info_inputs = { + 'inputs': tf.saved_model.utils.build_tensor_info(inputs)} + tensor_info_outputs = {} + for k, v in outputs.items(): + tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v) + + detection_signature = ( + tf.saved_model.signature_def_utils.build_signature_def( + inputs=tensor_info_inputs, + outputs=tensor_info_outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME + )) + + builder.add_meta_graph_and_variables( + sess, + [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + detection_signature, + }, + ) + builder.save() + + +def write_graph_and_checkpoint(inference_graph_def, + model_path, + input_saver_def, + trained_checkpoint_prefix): + """Writes the graph and the checkpoint into disk.""" + for node in inference_graph_def.node: + node.device = '' + with tf.Graph().as_default(): + tf.import_graph_def(inference_graph_def, name='') + with tf.Session() as sess: + saver = tf.train.Saver( + saver_def=input_saver_def, save_relative_paths=True) + saver.restore(sess, trained_checkpoint_prefix) + saver.save(sess, model_path) + + +def _get_outputs_from_inputs(input_tensors, detection_model, + output_collection_name): + inputs = tf.cast(input_tensors, dtype=tf.float32) + preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs) + output_tensors = detection_model.predict( + preprocessed_inputs, true_image_shapes) + postprocessed_tensors = detection_model.postprocess( + output_tensors, true_image_shapes) + return add_output_tensor_nodes(postprocessed_tensors, + output_collection_name) + + +def build_detection_graph(input_type, detection_model, input_shape, + output_collection_name, graph_hook_fn): + """Build the detection graph.""" + if input_type not in input_placeholder_fn_map: + raise ValueError('Unknown input type: {}'.format(input_type)) + placeholder_args = {} + if input_shape is not None: + if (input_type != 'image_tensor' and + input_type != 'encoded_image_string_tensor' and + input_type != 'tf_example'): + raise ValueError('Can only specify input shape for `image_tensor`, ' + '`encoded_image_string_tensor`, or `tf_example` ' + 'inputs.') + placeholder_args['input_shape'] = input_shape + placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type]( + **placeholder_args) + outputs = _get_outputs_from_inputs( + input_tensors=input_tensors, + detection_model=detection_model, + output_collection_name=output_collection_name) + + # Add global step to the graph. + slim.get_or_create_global_step() + + if graph_hook_fn: graph_hook_fn() + + return outputs, placeholder_tensor + + +def _export_inference_graph(input_type, + detection_model, + use_moving_averages, + trained_checkpoint_prefix, + output_directory, + additional_output_tensor_names=None, + input_shape=None, + output_collection_name='inference_op', + graph_hook_fn=None, + write_inference_graph=False, + temp_checkpoint_prefix=''): + """Export helper.""" + tf.gfile.MakeDirs(output_directory) + frozen_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + saved_model_path = os.path.join(output_directory, 'saved_model') + model_path = os.path.join(output_directory, 'model.ckpt') + + outputs, placeholder_tensor = build_detection_graph( + input_type=input_type, + detection_model=detection_model, + input_shape=input_shape, + output_collection_name=output_collection_name, + graph_hook_fn=graph_hook_fn) + + profile_inference_graph(tf.get_default_graph()) + saver_kwargs = {} + if use_moving_averages: + if not temp_checkpoint_prefix: + # This check is to be compatible with both version of SaverDef. + if os.path.isfile(trained_checkpoint_prefix): + saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 + temp_checkpoint_prefix = tempfile.NamedTemporaryFile().name + else: + temp_checkpoint_prefix = tempfile.mkdtemp() + replace_variable_values_with_moving_averages( + tf.get_default_graph(), trained_checkpoint_prefix, + temp_checkpoint_prefix) + checkpoint_to_use = temp_checkpoint_prefix + else: + checkpoint_to_use = trained_checkpoint_prefix + + saver = tf.train.Saver(**saver_kwargs) + input_saver_def = saver.as_saver_def() + + write_graph_and_checkpoint( + inference_graph_def=tf.get_default_graph().as_graph_def(), + model_path=model_path, + input_saver_def=input_saver_def, + trained_checkpoint_prefix=checkpoint_to_use) + if write_inference_graph: + inference_graph_def = tf.get_default_graph().as_graph_def() + inference_graph_path = os.path.join(output_directory, + 'inference_graph.pbtxt') + for node in inference_graph_def.node: + node.device = '' + with tf.gfile.GFile(inference_graph_path, 'wb') as f: + f.write(str(inference_graph_def)) + + if additional_output_tensor_names is not None: + output_node_names = ','.join(outputs.keys()+additional_output_tensor_names) + else: + output_node_names = ','.join(outputs.keys()) + + frozen_graph_def = freeze_graph.freeze_graph_with_def_protos( + input_graph_def=tf.get_default_graph().as_graph_def(), + input_saver_def=input_saver_def, + input_checkpoint=checkpoint_to_use, + output_node_names=output_node_names, + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph=frozen_graph_path, + clear_devices=True, + initializer_nodes='') + + write_saved_model(saved_model_path, frozen_graph_def, + placeholder_tensor, outputs) + + +def export_inference_graph(input_type, + pipeline_config, + trained_checkpoint_prefix, + output_directory, + input_shape=None, + output_collection_name='inference_op', + additional_output_tensor_names=None, + write_inference_graph=False): + """Exports inference graph for the model specified in the pipeline config. + + Args: + input_type: Type of input for the graph. Can be one of ['image_tensor', + 'encoded_image_string_tensor', 'tf_example']. + pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto. + trained_checkpoint_prefix: Path to the trained checkpoint file. + output_directory: Path to write outputs. + input_shape: Sets a fixed shape for an `image_tensor` input. If not + specified, will default to [None, None, None, 3]. + output_collection_name: Name of collection to add output tensors to. + If None, does not add output tensors to a collection. + additional_output_tensor_names: list of additional output + tensors to include in the frozen graph. + write_inference_graph: If true, writes inference graph to disk. + """ + detection_model = model_builder.build(pipeline_config.model, + is_training=False) + graph_rewriter_fn = None + if pipeline_config.HasField('graph_rewriter'): + graph_rewriter_config = pipeline_config.graph_rewriter + graph_rewriter_fn = graph_rewriter_builder.build(graph_rewriter_config, + is_training=False) + _export_inference_graph( + input_type, + detection_model, + pipeline_config.eval_config.use_moving_averages, + trained_checkpoint_prefix, + output_directory, + additional_output_tensor_names, + input_shape, + output_collection_name, + graph_hook_fn=graph_rewriter_fn, + write_inference_graph=write_inference_graph) + pipeline_config.eval_config.use_moving_averages = False + config_util.save_pipeline_config(pipeline_config, output_directory) + + +def profile_inference_graph(graph): + """Profiles the inference graph. + + Prints model parameters and computation FLOPs given an inference graph. + BatchNorms are excluded from the parameter count due to the fact that + BatchNorms are usually folded. BatchNorm, Initializer, Regularizer + and BiasAdd are not considered in FLOP count. + + Args: + graph: the inference graph. + """ + tfprof_vars_option = ( + tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS) + tfprof_flops_option = tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS + + # Batchnorm is usually folded during inference. + tfprof_vars_option['trim_name_regexes'] = ['.*BatchNorm.*'] + # Initializer and Regularizer are only used in training. + tfprof_flops_option['trim_name_regexes'] = [ + '.*BatchNorm.*', '.*Initializer.*', '.*Regularizer.*', '.*BiasAdd.*' + ] + + tf.contrib.tfprof.model_analyzer.print_model_analysis( + graph, + tfprof_options=tfprof_vars_option) + + tf.contrib.tfprof.model_analyzer.print_model_analysis( + graph, + tfprof_options=tfprof_flops_option) diff --git a/exporter_test.py b/exporter_test.py new file mode 100644 index 0000000..66c3963 --- /dev/null +++ b/exporter_test.py @@ -0,0 +1,1144 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.export_inference_graph.""" +import os +import numpy as np +import six +import tensorflow as tf +from google.protobuf import text_format +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.tools import strip_unused_lib +from object_detection import exporter +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.core import model +from object_detection.protos import graph_rewriter_pb2 +from object_detection.protos import pipeline_pb2 +from object_detection.utils import ops +from object_detection.utils import variables_helper + +if six.PY2: + import mock # pylint: disable=g-import-not-at-top +else: + from unittest import mock # pylint: disable=g-import-not-at-top + +slim = tf.contrib.slim + + +class FakeModel(model.DetectionModel): + + def __init__(self, add_detection_keypoints=False, add_detection_masks=False, + add_detection_features=False): + self._add_detection_keypoints = add_detection_keypoints + self._add_detection_masks = add_detection_masks + self._add_detection_features = add_detection_features + + def preprocess(self, inputs): + true_image_shapes = [] # Doesn't matter for the fake model. + return tf.identity(inputs), true_image_shapes + + def predict(self, preprocessed_inputs, true_image_shapes): + return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)} + + def postprocess(self, prediction_dict, true_image_shapes): + with tf.control_dependencies(prediction_dict.values()): + postprocessed_tensors = { + 'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]], tf.float32), + 'detection_scores': tf.constant([[0.7, 0.6], + [0.9, 0.0]], tf.float32), + 'detection_multiclass_scores': tf.constant([[[0.3, 0.7], [0.4, 0.6]], + [[0.1, 0.9], [0.0, 0.0]]], + tf.float32), + 'detection_classes': tf.constant([[0, 1], + [1, 0]], tf.float32), + 'num_detections': tf.constant([2, 1], tf.float32), + 'raw_detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.5, 0.0, 0.5]]], + tf.float32), + 'raw_detection_scores': tf.constant([[0.7, 0.6], + [0.9, 0.5]], tf.float32), + } + if self._add_detection_keypoints: + postprocessed_tensors['detection_keypoints'] = tf.constant( + np.arange(48).reshape([2, 2, 6, 2]), tf.float32) + if self._add_detection_masks: + postprocessed_tensors['detection_masks'] = tf.constant( + np.arange(64).reshape([2, 2, 4, 4]), tf.float32) + if self._add_detection_features: + # let fake detection features have shape [4, 4, 10] + postprocessed_tensors['detection_features'] = tf.constant( + np.ones((2, 2, 4, 4, 10)), tf.float32) + + return postprocessed_tensors + + def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): + pass + + def loss(self, prediction_dict, true_image_shapes): + pass + + def regularization_losses(self): + pass + + def updates(self): + pass + + +class ExportInferenceGraphTest(tf.test.TestCase): + + def _save_checkpoint_from_mock_model(self, + checkpoint_path, + use_moving_averages, + enable_quantization=False): + g = tf.Graph() + with g.as_default(): + mock_model = FakeModel() + preprocessed_inputs, true_image_shapes = mock_model.preprocess( + tf.placeholder(tf.float32, shape=[None, None, None, 3])) + predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) + mock_model.postprocess(predictions, true_image_shapes) + if use_moving_averages: + tf.train.ExponentialMovingAverage(0.0).apply() + tf.train.get_or_create_global_step() + if enable_quantization: + graph_rewriter_config = graph_rewriter_pb2.GraphRewriter() + graph_rewriter_config.quantization.delay = 500000 + graph_rewriter_fn = graph_rewriter_builder.build( + graph_rewriter_config, is_training=False) + graph_rewriter_fn() + saver = tf.train.Saver() + init = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init) + saver.save(sess, checkpoint_path) + + def _load_inference_graph(self, inference_graph_path, is_binary=True): + od_graph = tf.Graph() + with od_graph.as_default(): + od_graph_def = tf.GraphDef() + with tf.gfile.GFile(inference_graph_path) as fid: + if is_binary: + od_graph_def.ParseFromString(fid.read()) + else: + text_format.Parse(fid.read(), od_graph_def) + tf.import_graph_def(od_graph_def, name='') + return od_graph + + def _create_tf_example(self, image_array): + with self.test_session(): + encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval() + def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': _bytes_feature(encoded_image), + 'image/format': _bytes_feature('jpg'), + 'image/source_id': _bytes_feature('image_id') + })).SerializeToString() + return example + + def test_export_graph_with_image_tensor_input(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + self.assertTrue(os.path.exists(os.path.join( + output_directory, 'saved_model', 'saved_model.pb'))) + + def test_write_inference_graph(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory, + write_inference_graph=True) + self.assertTrue(os.path.exists(os.path.join( + output_directory, 'inference_graph.pbtxt'))) + + def test_export_graph_with_fixed_size_image_tensor_input(self): + input_shape = [1, 320, 320, 3] + + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory, + input_shape=input_shape) + saved_model_path = os.path.join(output_directory, 'saved_model') + self.assertTrue( + os.path.exists(os.path.join(saved_model_path, 'saved_model.pb'))) + + with tf.Graph().as_default() as od_graph: + with self.test_session(graph=od_graph) as sess: + meta_graph = tf.saved_model.loader.load( + sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) + signature = meta_graph.signature_def['serving_default'] + input_tensor_name = signature.inputs['inputs'].name + image_tensor = od_graph.get_tensor_by_name(input_tensor_name) + self.assertSequenceEqual(image_tensor.get_shape().as_list(), + input_shape) + + def test_export_graph_with_tf_example_input(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + self.assertTrue(os.path.exists(os.path.join( + output_directory, 'saved_model', 'saved_model.pb'))) + + def test_export_graph_with_fixed_size_tf_example_input(self): + input_shape = [1, 320, 320, 3] + + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory, + input_shape=input_shape) + saved_model_path = os.path.join(output_directory, 'saved_model') + self.assertTrue( + os.path.exists(os.path.join(saved_model_path, 'saved_model.pb'))) + + def test_export_graph_with_encoded_image_string_input(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='encoded_image_string_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + self.assertTrue(os.path.exists(os.path.join( + output_directory, 'saved_model', 'saved_model.pb'))) + + def test_export_graph_with_fixed_size_encoded_image_string_input(self): + input_shape = [1, 320, 320, 3] + + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, use_moving_averages=False) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='encoded_image_string_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory, + input_shape=input_shape) + saved_model_path = os.path.join(output_directory, 'saved_model') + self.assertTrue( + os.path.exists(os.path.join(saved_model_path, 'saved_model.pb'))) + + def _get_variables_in_checkpoint(self, checkpoint_file): + return set([ + var_name + for var_name, _ in tf.train.list_variables(checkpoint_file)]) + + def test_replace_variable_values_with_moving_averages(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + new_checkpoint_prefix = os.path.join(tmp_dir, 'new.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + graph = tf.Graph() + with graph.as_default(): + fake_model = FakeModel() + preprocessed_inputs, true_image_shapes = fake_model.preprocess( + tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])) + predictions = fake_model.predict(preprocessed_inputs, true_image_shapes) + fake_model.postprocess(predictions, true_image_shapes) + exporter.replace_variable_values_with_moving_averages( + graph, trained_checkpoint_prefix, new_checkpoint_prefix) + + expected_variables = set(['conv2d/bias', 'conv2d/kernel']) + variables_in_old_ckpt = self._get_variables_in_checkpoint( + trained_checkpoint_prefix) + self.assertIn('conv2d/bias/ExponentialMovingAverage', + variables_in_old_ckpt) + self.assertIn('conv2d/kernel/ExponentialMovingAverage', + variables_in_old_ckpt) + variables_in_new_ckpt = self._get_variables_in_checkpoint( + new_checkpoint_prefix) + self.assertTrue(expected_variables.issubset(variables_in_new_ckpt)) + self.assertNotIn('conv2d/bias/ExponentialMovingAverage', + variables_in_new_ckpt) + self.assertNotIn('conv2d/kernel/ExponentialMovingAverage', + variables_in_new_ckpt) + + def test_export_graph_with_moving_averages(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = True + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + self.assertTrue(os.path.exists(os.path.join( + output_directory, 'saved_model', 'saved_model.pb'))) + expected_variables = set(['conv2d/bias', 'conv2d/kernel', 'global_step']) + actual_variables = set( + [var_name for var_name, _ in tf.train.list_variables(output_directory)]) + self.assertTrue(expected_variables.issubset(actual_variables)) + + def test_export_model_with_quantization_nodes(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model( + trained_checkpoint_prefix, + use_moving_averages=False, + enable_quantization=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'inference_graph.pbtxt') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + text_format.Merge( + """graph_rewriter { + quantization { + delay: 50000 + activation_bits: 8 + weight_bits: 8 + } + }""", pipeline_config) + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory, + write_inference_graph=True) + self._load_inference_graph(inference_graph_path, is_binary=False) + has_quant_nodes = False + for v in variables_helper.get_global_variables_safely(): + if v.op.name.endswith('act_quant/min'): + has_quant_nodes = True + break + self.assertTrue(has_quant_nodes) + + def test_export_model_with_all_output_nodes(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True, + add_detection_features=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + inference_graph = self._load_inference_graph(inference_graph_path) + with self.test_session(graph=inference_graph): + inference_graph.get_tensor_by_name('image_tensor:0') + inference_graph.get_tensor_by_name('detection_boxes:0') + inference_graph.get_tensor_by_name('detection_scores:0') + inference_graph.get_tensor_by_name('detection_multiclass_scores:0') + inference_graph.get_tensor_by_name('detection_classes:0') + inference_graph.get_tensor_by_name('detection_keypoints:0') + inference_graph.get_tensor_by_name('detection_masks:0') + inference_graph.get_tensor_by_name('num_detections:0') + inference_graph.get_tensor_by_name('detection_features:0') + + def test_export_model_with_detection_only_nodes(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel(add_detection_masks=False) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + inference_graph = self._load_inference_graph(inference_graph_path) + with self.test_session(graph=inference_graph): + inference_graph.get_tensor_by_name('image_tensor:0') + inference_graph.get_tensor_by_name('detection_boxes:0') + inference_graph.get_tensor_by_name('detection_scores:0') + inference_graph.get_tensor_by_name('detection_multiclass_scores:0') + inference_graph.get_tensor_by_name('detection_classes:0') + inference_graph.get_tensor_by_name('num_detections:0') + with self.assertRaises(KeyError): + inference_graph.get_tensor_by_name('detection_keypoints:0') + inference_graph.get_tensor_by_name('detection_masks:0') + + def test_export_model_with_detection_only_nodes_and_detection_features(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel(add_detection_features=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + inference_graph = self._load_inference_graph(inference_graph_path) + with self.test_session(graph=inference_graph): + inference_graph.get_tensor_by_name('image_tensor:0') + inference_graph.get_tensor_by_name('detection_boxes:0') + inference_graph.get_tensor_by_name('detection_scores:0') + inference_graph.get_tensor_by_name('detection_multiclass_scores:0') + inference_graph.get_tensor_by_name('detection_classes:0') + inference_graph.get_tensor_by_name('num_detections:0') + inference_graph.get_tensor_by_name('detection_features:0') + with self.assertRaises(KeyError): + inference_graph.get_tensor_by_name('detection_keypoints:0') + inference_graph.get_tensor_by_name('detection_masks:0') + + def test_export_and_run_inference_with_image_tensor(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + inference_graph = self._load_inference_graph(inference_graph_path) + with self.test_session(graph=inference_graph) as sess: + image_tensor = inference_graph.get_tensor_by_name('image_tensor:0') + boxes = inference_graph.get_tensor_by_name('detection_boxes:0') + scores = inference_graph.get_tensor_by_name('detection_scores:0') + classes = inference_graph.get_tensor_by_name('detection_classes:0') + keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') + masks = inference_graph.get_tensor_by_name('detection_masks:0') + num_detections = inference_graph.get_tensor_by_name('num_detections:0') + (boxes_np, scores_np, classes_np, keypoints_np, masks_np, + num_detections_np) = sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={image_tensor: np.ones((2, 4, 4, 3)).astype(np.uint8)}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def _create_encoded_image_string(self, image_array_np, encoding_format): + od_graph = tf.Graph() + with od_graph.as_default(): + if encoding_format == 'jpg': + encoded_string = tf.image.encode_jpeg(image_array_np) + elif encoding_format == 'png': + encoded_string = tf.image.encode_png(image_array_np) + else: + raise ValueError('Supports only the following formats: `jpg`, `png`') + with self.test_session(graph=od_graph): + return encoded_string.eval() + + def test_export_and_run_inference_with_encoded_image_string_tensor(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='encoded_image_string_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + inference_graph = self._load_inference_graph(inference_graph_path) + jpg_image_str = self._create_encoded_image_string( + np.ones((4, 4, 3)).astype(np.uint8), 'jpg') + png_image_str = self._create_encoded_image_string( + np.ones((4, 4, 3)).astype(np.uint8), 'png') + with self.test_session(graph=inference_graph) as sess: + image_str_tensor = inference_graph.get_tensor_by_name( + 'encoded_image_string_tensor:0') + boxes = inference_graph.get_tensor_by_name('detection_boxes:0') + scores = inference_graph.get_tensor_by_name('detection_scores:0') + multiclass_scores = inference_graph.get_tensor_by_name( + 'detection_multiclass_scores:0') + classes = inference_graph.get_tensor_by_name('detection_classes:0') + keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') + masks = inference_graph.get_tensor_by_name('detection_masks:0') + num_detections = inference_graph.get_tensor_by_name('num_detections:0') + for image_str in [jpg_image_str, png_image_str]: + image_str_batch_np = np.hstack([image_str]* 2) + (boxes_np, scores_np, multiclass_scores_np, classes_np, keypoints_np, + masks_np, num_detections_np) = sess.run( + [ + boxes, scores, multiclass_scores, classes, keypoints, masks, + num_detections + ], + feed_dict={image_str_tensor: image_str_batch_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(multiclass_scores_np, [[[0.3, 0.7], [0.4, 0.6]], + [[0.1, 0.9], [0.0, 0.0]]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_raise_runtime_error_on_images_with_different_sizes(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='encoded_image_string_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + inference_graph = self._load_inference_graph(inference_graph_path) + large_image = self._create_encoded_image_string( + np.ones((4, 4, 3)).astype(np.uint8), 'jpg') + small_image = self._create_encoded_image_string( + np.ones((2, 2, 3)).astype(np.uint8), 'jpg') + + image_str_batch_np = np.hstack([large_image, small_image]) + with self.test_session(graph=inference_graph) as sess: + image_str_tensor = inference_graph.get_tensor_by_name( + 'encoded_image_string_tensor:0') + boxes = inference_graph.get_tensor_by_name('detection_boxes:0') + scores = inference_graph.get_tensor_by_name('detection_scores:0') + classes = inference_graph.get_tensor_by_name('detection_classes:0') + keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') + masks = inference_graph.get_tensor_by_name('detection_masks:0') + num_detections = inference_graph.get_tensor_by_name('num_detections:0') + with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, + 'TensorArray.*shape'): + sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={image_str_tensor: image_str_batch_np}) + + def test_export_and_run_inference_with_tf_example(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + inference_graph = self._load_inference_graph(inference_graph_path) + tf_example_np = np.expand_dims(self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8)), axis=0) + with self.test_session(graph=inference_graph) as sess: + tf_example = inference_graph.get_tensor_by_name('tf_example:0') + boxes = inference_graph.get_tensor_by_name('detection_boxes:0') + scores = inference_graph.get_tensor_by_name('detection_scores:0') + classes = inference_graph.get_tensor_by_name('detection_classes:0') + keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') + masks = inference_graph.get_tensor_by_name('detection_masks:0') + num_detections = inference_graph.get_tensor_by_name('num_detections:0') + (boxes_np, scores_np, classes_np, keypoints_np, masks_np, + num_detections_np) = sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_write_frozen_graph(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + inference_graph_path = os.path.join(output_directory, + 'frozen_inference_graph.pb') + tf.gfile.MakeDirs(output_directory) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + detection_model = model_builder.build(pipeline_config.model, + is_training=False) + outputs, _ = exporter.build_detection_graph( + input_type='tf_example', + detection_model=detection_model, + input_shape=None, + output_collection_name='inference_op', + graph_hook_fn=None) + output_node_names = ','.join(outputs.keys()) + saver = tf.train.Saver() + input_saver_def = saver.as_saver_def() + exporter.freeze_graph_with_def_protos( + input_graph_def=tf.get_default_graph().as_graph_def(), + input_saver_def=input_saver_def, + input_checkpoint=trained_checkpoint_prefix, + output_node_names=output_node_names, + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph=inference_graph_path, + clear_devices=True, + initializer_nodes='') + + inference_graph = self._load_inference_graph(inference_graph_path) + tf_example_np = np.expand_dims(self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8)), axis=0) + with self.test_session(graph=inference_graph) as sess: + tf_example = inference_graph.get_tensor_by_name('tf_example:0') + boxes = inference_graph.get_tensor_by_name('detection_boxes:0') + scores = inference_graph.get_tensor_by_name('detection_scores:0') + classes = inference_graph.get_tensor_by_name('detection_classes:0') + keypoints = inference_graph.get_tensor_by_name('detection_keypoints:0') + masks = inference_graph.get_tensor_by_name('detection_masks:0') + num_detections = inference_graph.get_tensor_by_name('num_detections:0') + (boxes_np, scores_np, classes_np, keypoints_np, masks_np, + num_detections_np) = sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_export_graph_saves_pipeline_file(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=True) + output_directory = os.path.join(tmp_dir, 'output') + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter.export_inference_graph( + input_type='image_tensor', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + expected_pipeline_path = os.path.join( + output_directory, 'pipeline.config') + self.assertTrue(os.path.exists(expected_pipeline_path)) + + written_pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + with tf.gfile.GFile(expected_pipeline_path, 'r') as f: + proto_str = f.read() + text_format.Merge(proto_str, written_pipeline_config) + self.assertProtoEquals(pipeline_config, written_pipeline_config) + + def test_export_saved_model_and_run_inference(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + output_directory = os.path.join(tmp_dir, 'output') + saved_model_path = os.path.join(output_directory, 'saved_model') + + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + tf_example_np = np.hstack([self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8))] * 2) + with tf.Graph().as_default() as od_graph: + with self.test_session(graph=od_graph) as sess: + meta_graph = tf.saved_model.loader.load( + sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) + + signature = meta_graph.signature_def['serving_default'] + input_tensor_name = signature.inputs['inputs'].name + tf_example = od_graph.get_tensor_by_name(input_tensor_name) + + boxes = od_graph.get_tensor_by_name( + signature.outputs['detection_boxes'].name) + scores = od_graph.get_tensor_by_name( + signature.outputs['detection_scores'].name) + multiclass_scores = od_graph.get_tensor_by_name( + signature.outputs['detection_multiclass_scores'].name) + classes = od_graph.get_tensor_by_name( + signature.outputs['detection_classes'].name) + keypoints = od_graph.get_tensor_by_name( + signature.outputs['detection_keypoints'].name) + masks = od_graph.get_tensor_by_name( + signature.outputs['detection_masks'].name) + num_detections = od_graph.get_tensor_by_name( + signature.outputs['num_detections'].name) + + (boxes_np, scores_np, multiclass_scores_np, classes_np, keypoints_np, + masks_np, num_detections_np) = sess.run( + [boxes, scores, multiclass_scores, classes, keypoints, masks, + num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(multiclass_scores_np, [[[0.3, 0.7], [0.4, 0.6]], + [[0.1, 0.9], [0.0, 0.0]]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_write_saved_model(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + output_directory = os.path.join(tmp_dir, 'output') + saved_model_path = os.path.join(output_directory, 'saved_model') + tf.gfile.MakeDirs(output_directory) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + detection_model = model_builder.build(pipeline_config.model, + is_training=False) + outputs, placeholder_tensor = exporter.build_detection_graph( + input_type='tf_example', + detection_model=detection_model, + input_shape=None, + output_collection_name='inference_op', + graph_hook_fn=None) + output_node_names = ','.join(outputs.keys()) + saver = tf.train.Saver() + input_saver_def = saver.as_saver_def() + frozen_graph_def = exporter.freeze_graph_with_def_protos( + input_graph_def=tf.get_default_graph().as_graph_def(), + input_saver_def=input_saver_def, + input_checkpoint=trained_checkpoint_prefix, + output_node_names=output_node_names, + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph='', + clear_devices=True, + initializer_nodes='') + exporter.write_saved_model( + saved_model_path=saved_model_path, + frozen_graph_def=frozen_graph_def, + inputs=placeholder_tensor, + outputs=outputs) + + tf_example_np = np.hstack([self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8))] * 2) + with tf.Graph().as_default() as od_graph: + with self.test_session(graph=od_graph) as sess: + meta_graph = tf.saved_model.loader.load( + sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) + + signature = meta_graph.signature_def['serving_default'] + input_tensor_name = signature.inputs['inputs'].name + tf_example = od_graph.get_tensor_by_name(input_tensor_name) + + boxes = od_graph.get_tensor_by_name( + signature.outputs['detection_boxes'].name) + scores = od_graph.get_tensor_by_name( + signature.outputs['detection_scores'].name) + classes = od_graph.get_tensor_by_name( + signature.outputs['detection_classes'].name) + keypoints = od_graph.get_tensor_by_name( + signature.outputs['detection_keypoints'].name) + masks = od_graph.get_tensor_by_name( + signature.outputs['detection_masks'].name) + num_detections = od_graph.get_tensor_by_name( + signature.outputs['num_detections'].name) + + (boxes_np, scores_np, classes_np, keypoints_np, masks_np, + num_detections_np) = sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_export_checkpoint_and_run_inference(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + output_directory = os.path.join(tmp_dir, 'output') + model_path = os.path.join(output_directory, 'model.ckpt') + meta_graph_path = model_path + '.meta' + + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + exporter.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_prefix=trained_checkpoint_prefix, + output_directory=output_directory) + + tf_example_np = np.hstack([self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8))] * 2) + with tf.Graph().as_default() as od_graph: + with self.test_session(graph=od_graph) as sess: + new_saver = tf.train.import_meta_graph(meta_graph_path) + new_saver.restore(sess, model_path) + + tf_example = od_graph.get_tensor_by_name('tf_example:0') + boxes = od_graph.get_tensor_by_name('detection_boxes:0') + scores = od_graph.get_tensor_by_name('detection_scores:0') + classes = od_graph.get_tensor_by_name('detection_classes:0') + keypoints = od_graph.get_tensor_by_name('detection_keypoints:0') + masks = od_graph.get_tensor_by_name('detection_masks:0') + num_detections = od_graph.get_tensor_by_name('num_detections:0') + (boxes_np, scores_np, classes_np, keypoints_np, masks_np, + num_detections_np) = sess.run( + [boxes, scores, classes, keypoints, masks, num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_write_graph_and_checkpoint(self): + tmp_dir = self.get_temp_dir() + trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') + self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, + use_moving_averages=False) + output_directory = os.path.join(tmp_dir, 'output') + model_path = os.path.join(output_directory, 'model.ckpt') + meta_graph_path = model_path + '.meta' + tf.gfile.MakeDirs(output_directory) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel( + add_detection_keypoints=True, add_detection_masks=True) + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.eval_config.use_moving_averages = False + detection_model = model_builder.build(pipeline_config.model, + is_training=False) + exporter.build_detection_graph( + input_type='tf_example', + detection_model=detection_model, + input_shape=None, + output_collection_name='inference_op', + graph_hook_fn=None) + saver = tf.train.Saver() + input_saver_def = saver.as_saver_def() + exporter.write_graph_and_checkpoint( + inference_graph_def=tf.get_default_graph().as_graph_def(), + model_path=model_path, + input_saver_def=input_saver_def, + trained_checkpoint_prefix=trained_checkpoint_prefix) + + tf_example_np = np.hstack([self._create_tf_example( + np.ones((4, 4, 3)).astype(np.uint8))] * 2) + with tf.Graph().as_default() as od_graph: + with self.test_session(graph=od_graph) as sess: + new_saver = tf.train.import_meta_graph(meta_graph_path) + new_saver.restore(sess, model_path) + + tf_example = od_graph.get_tensor_by_name('tf_example:0') + boxes = od_graph.get_tensor_by_name('detection_boxes:0') + scores = od_graph.get_tensor_by_name('detection_scores:0') + raw_boxes = od_graph.get_tensor_by_name('raw_detection_boxes:0') + raw_scores = od_graph.get_tensor_by_name('raw_detection_scores:0') + classes = od_graph.get_tensor_by_name('detection_classes:0') + keypoints = od_graph.get_tensor_by_name('detection_keypoints:0') + masks = od_graph.get_tensor_by_name('detection_masks:0') + num_detections = od_graph.get_tensor_by_name('num_detections:0') + (boxes_np, scores_np, raw_boxes_np, raw_scores_np, classes_np, + keypoints_np, masks_np, num_detections_np) = sess.run( + [boxes, scores, raw_boxes, raw_scores, classes, keypoints, masks, + num_detections], + feed_dict={tf_example: tf_example_np}) + self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(scores_np, [[0.7, 0.6], + [0.9, 0.0]]) + self.assertAllClose(raw_boxes_np, [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.5, 0.0, 0.5]]]) + self.assertAllClose(raw_scores_np, [[0.7, 0.6], + [0.9, 0.5]]) + self.assertAllClose(classes_np, [[1, 2], + [2, 1]]) + self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2])) + self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) + self.assertAllClose(num_detections_np, [2, 1]) + + def test_rewrite_nn_resize_op(self): + g = tf.Graph() + with g.as_default(): + x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) + y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8)) + s = ops.nearest_neighbor_upsampling(x, 2) + t = s + y + exporter.rewrite_nn_resize_op() + + resize_op_found = False + for op in g.get_operations(): + if op.type == 'ResizeNearestNeighbor': + resize_op_found = True + self.assertEqual(op.inputs[0], x) + self.assertEqual(op.outputs[0].consumers()[0], t.op) + break + + self.assertTrue(resize_op_found) + + def test_rewrite_nn_resize_op_quantized(self): + g = tf.Graph() + with g.as_default(): + x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) + x_conv = tf.contrib.slim.conv2d(x, 8, 1) + y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8)) + s = ops.nearest_neighbor_upsampling(x_conv, 2) + t = s + y + + graph_rewriter_config = graph_rewriter_pb2.GraphRewriter() + graph_rewriter_config.quantization.delay = 500000 + graph_rewriter_fn = graph_rewriter_builder.build( + graph_rewriter_config, is_training=False) + graph_rewriter_fn() + + exporter.rewrite_nn_resize_op(is_quantized=True) + + resize_op_found = False + for op in g.get_operations(): + if op.type == 'ResizeNearestNeighbor': + resize_op_found = True + self.assertEqual(op.inputs[0].op.type, 'FakeQuantWithMinMaxVars') + self.assertEqual(op.outputs[0].consumers()[0], t.op) + break + + self.assertTrue(resize_op_found) + + def test_rewrite_nn_resize_op_multiple_path(self): + g = tf.Graph() + with g.as_default(): + with tf.name_scope('nearest_upsampling'): + x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) + x_stack = tf.stack([tf.stack([x] * 2, axis=3)] * 2, axis=2) + x_reshape = tf.reshape(x_stack, [8, 20, 20, 8]) + + with tf.name_scope('nearest_upsampling'): + x_2 = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) + x_stack_2 = tf.stack([tf.stack([x_2] * 2, axis=3)] * 2, axis=2) + x_reshape_2 = tf.reshape(x_stack_2, [8, 20, 20, 8]) + + t = x_reshape + x_reshape_2 + + exporter.rewrite_nn_resize_op() + + graph_def = g.as_graph_def() + graph_def = strip_unused_lib.strip_unused( + graph_def, + input_node_names=[ + 'nearest_upsampling/Placeholder', 'nearest_upsampling_1/Placeholder' + ], + output_node_names=['add'], + placeholder_type_enum=dtypes.float32.as_datatype_enum) + + counter_resize_op = 0 + t_input_ops = [op.name for op in t.op.inputs] + for node in graph_def.node: + # Make sure Stacks are replaced. + self.assertNotEqual(node.op, 'Pack') + if node.op == 'ResizeNearestNeighbor': + counter_resize_op += 1 + self.assertIn(node.name + ':0', t_input_ops) + self.assertEqual(counter_resize_op, 2) + + +if __name__ == '__main__': + tf.test.main() diff --git a/inputs.py b/inputs.py new file mode 100644 index 0000000..fd80779 --- /dev/null +++ b/inputs.py @@ -0,0 +1,780 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Model input function for tf-learn object detection model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +import tensorflow as tf +from object_detection.builders import dataset_builder +from object_detection.builders import image_resizer_builder +from object_detection.builders import model_builder +from object_detection.builders import preprocessor_builder +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import keypoint_ops +from object_detection.core import preprocessor +from object_detection.core import standard_fields as fields +from object_detection.data_decoders import tf_example_decoder +from object_detection.protos import eval_pb2 +from object_detection.protos import image_resizer_pb2 +from object_detection.protos import input_reader_pb2 +from object_detection.protos import model_pb2 +from object_detection.protos import train_pb2 +from object_detection.utils import config_util +from object_detection.utils import ops as util_ops +from object_detection.utils import shape_utils + +HASH_KEY = 'hash' +HASH_BINS = 1 << 31 +SERVING_FED_EXAMPLE_KEY = 'serialized_example' + +# A map of names to methods that help build the input pipeline. +INPUT_BUILDER_UTIL_MAP = { + 'dataset_build': dataset_builder.build, + 'model_build': model_builder.build, +} + + +def _multiclass_scores_or_one_hot_labels(multiclass_scores, + groundtruth_boxes, + groundtruth_classes, num_classes): + """Returns one-hot encoding of classes when multiclass_scores is empty.""" + # Replace groundtruth_classes tensor with multiclass_scores tensor when its + # non-empty. If multiclass_scores is empty fall back on groundtruth_classes + # tensor. + def true_fn(): + return tf.reshape(multiclass_scores, + [tf.shape(groundtruth_boxes)[0], num_classes]) + def false_fn(): + return tf.one_hot(groundtruth_classes, num_classes) + + return tf.cond(tf.size(multiclass_scores) > 0, true_fn, false_fn) + + +def transform_input_data(tensor_dict, + model_preprocess_fn, + image_resizer_fn, + num_classes, + data_augmentation_fn=None, + merge_multiple_boxes=False, + retain_original_image=False, + use_multiclass_scores=False, + use_bfloat16=False, + retain_original_image_additional_channels=False): + """A single function that is responsible for all input data transformations. + + Data transformation functions are applied in the following order. + 1. If key fields.InputDataFields.image_additional_channels is present in + tensor_dict, the additional channels will be merged into + fields.InputDataFields.image. + 2. data_augmentation_fn (optional): applied on tensor_dict. + 3. model_preprocess_fn: applied only on image tensor in tensor_dict. + 4. image_resizer_fn: applied on original image and instance mask tensor in + tensor_dict. + 5. one_hot_encoding: applied to classes tensor in tensor_dict. + 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the + same they can be merged into a single box with an associated k-hot class + label. + + Args: + tensor_dict: dictionary containing input tensors keyed by + fields.InputDataFields. + model_preprocess_fn: model's preprocess function to apply on image tensor. + This function must take in a 4-D float tensor and return a 4-D preprocess + float tensor and a tensor containing the true image shape. + image_resizer_fn: image resizer function to apply on groundtruth instance + `masks. This function must take a 3-D float tensor of an image and a 3-D + tensor of instance masks and return a resized version of these along with + the true shapes. + num_classes: number of max classes to one-hot (or k-hot) encode the class + labels. + data_augmentation_fn: (optional) data augmentation function to apply on + input `tensor_dict`. + merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes + and classes for a given image if the boxes are exactly the same. + retain_original_image: (optional) whether to retain original image in the + output dictionary. + use_multiclass_scores: whether to use multiclass scores as class targets + instead of one-hot encoding of `groundtruth_classes`. When + this is True and multiclass_scores is empty, one-hot encoding of + `groundtruth_classes` is used as a fallback. + use_bfloat16: (optional) a bool, whether to use bfloat16 in training. + retain_original_image_additional_channels: (optional) Whether to retain + original image additional channels in the output dictionary. + + Returns: + A dictionary keyed by fields.InputDataFields containing the tensors obtained + after applying all the transformations. + """ + out_tensor_dict = tensor_dict.copy() + if fields.InputDataFields.multiclass_scores in out_tensor_dict: + out_tensor_dict[ + fields.InputDataFields + .multiclass_scores] = _multiclass_scores_or_one_hot_labels( + out_tensor_dict[fields.InputDataFields.multiclass_scores], + out_tensor_dict[fields.InputDataFields.groundtruth_boxes], + out_tensor_dict[fields.InputDataFields.groundtruth_classes], + num_classes) + + if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: + out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( + out_tensor_dict) + out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict) + + if retain_original_image: + out_tensor_dict[fields.InputDataFields.original_image] = tf.cast( + image_resizer_fn(out_tensor_dict[fields.InputDataFields.image], + None)[0], tf.uint8) + + if fields.InputDataFields.image_additional_channels in out_tensor_dict: + channels = out_tensor_dict[fields.InputDataFields.image_additional_channels] + out_tensor_dict[fields.InputDataFields.image] = tf.concat( + [out_tensor_dict[fields.InputDataFields.image], channels], axis=2) + if retain_original_image_additional_channels: + out_tensor_dict[ + fields.InputDataFields.image_additional_channels] = tf.cast( + image_resizer_fn(channels, None)[0], tf.uint8) + + # Apply data augmentation ops. + if data_augmentation_fn is not None: + out_tensor_dict = data_augmentation_fn(out_tensor_dict) + + # Apply model preprocessing ops and resize instance masks. + image = out_tensor_dict[fields.InputDataFields.image] + preprocessed_resized_image, true_image_shape = model_preprocess_fn( + tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) + + preprocessed_shape = tf.shape(preprocessed_resized_image) + new_height, new_width = preprocessed_shape[1], preprocessed_shape[2] + + im_box = tf.stack([ + 0.0, 0.0, + tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]), + tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1]) + ]) + + if fields.InputDataFields.groundtruth_boxes in tensor_dict: + bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes] + boxlist = box_list.BoxList(bboxes) + realigned_bboxes = box_list_ops.change_coordinate_frame(boxlist, im_box) + out_tensor_dict[ + fields.InputDataFields.groundtruth_boxes] = realigned_bboxes.get() + + if fields.InputDataFields.groundtruth_keypoints in tensor_dict: + keypoints = out_tensor_dict[fields.InputDataFields.groundtruth_keypoints] + realigned_keypoints = keypoint_ops.change_coordinate_frame(keypoints, + im_box) + out_tensor_dict[ + fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints + + if use_bfloat16: + preprocessed_resized_image = tf.cast( + preprocessed_resized_image, tf.bfloat16) + out_tensor_dict[fields.InputDataFields.image] = tf.squeeze( + preprocessed_resized_image, axis=0) + out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( + true_image_shape, axis=0) + if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict: + masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + _, resized_masks, _ = image_resizer_fn(image, masks) + if use_bfloat16: + resized_masks = tf.cast(resized_masks, tf.bfloat16) + out_tensor_dict[ + fields.InputDataFields.groundtruth_instance_masks] = resized_masks + + label_offset = 1 + zero_indexed_groundtruth_classes = out_tensor_dict[ + fields.InputDataFields.groundtruth_classes] - label_offset + if use_multiclass_scores: + out_tensor_dict[ + fields.InputDataFields.groundtruth_classes] = out_tensor_dict[ + fields.InputDataFields.multiclass_scores] + else: + out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( + zero_indexed_groundtruth_classes, num_classes) + out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) + + if fields.InputDataFields.groundtruth_confidences in out_tensor_dict: + groundtruth_confidences = out_tensor_dict[ + fields.InputDataFields.groundtruth_confidences] + # Map the confidences to the one-hot encoding of classes + out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( + tf.reshape(groundtruth_confidences, [-1, 1]) * + out_tensor_dict[fields.InputDataFields.groundtruth_classes]) + else: + groundtruth_confidences = tf.ones_like( + zero_indexed_groundtruth_classes, dtype=tf.float32) + out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( + out_tensor_dict[fields.InputDataFields.groundtruth_classes]) + + if merge_multiple_boxes: + merged_boxes, merged_classes, merged_confidences, _ = ( + util_ops.merge_boxes_with_multiple_labels( + out_tensor_dict[fields.InputDataFields.groundtruth_boxes], + zero_indexed_groundtruth_classes, + groundtruth_confidences, + num_classes)) + merged_classes = tf.cast(merged_classes, tf.float32) + out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes + out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes + out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( + merged_confidences) + if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: + out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( + out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] + + return out_tensor_dict + + +def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, + spatial_image_shape=None): + """Pads input tensors to static shapes. + + In case num_additional_channels > 0, we assume that the additional channels + have already been concatenated to the base image. + + Args: + tensor_dict: Tensor dictionary of input data + max_num_boxes: Max number of groundtruth boxes needed to compute shapes for + padding. + num_classes: Number of classes in the dataset needed to compute shapes for + padding. + spatial_image_shape: A list of two integers of the form [height, width] + containing expected spatial shape of the image. + + Returns: + A dictionary keyed by fields.InputDataFields containing padding shapes for + tensors in the dataset. + + Raises: + ValueError: If groundtruth classes is neither rank 1 nor rank 2, or if we + detect that additional channels have not been concatenated yet. + """ + + if not spatial_image_shape or spatial_image_shape == [-1, -1]: + height, width = None, None + else: + height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence + + num_additional_channels = 0 + if fields.InputDataFields.image_additional_channels in tensor_dict: + num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[ + fields.InputDataFields.image_additional_channels].shape[2]) + + # We assume that if num_additional_channels > 0, then it has already been + # concatenated to the base image (but not the ground truth). + num_channels = 3 + if fields.InputDataFields.image in tensor_dict: + num_channels = shape_utils.get_dim_as_int( + tensor_dict[fields.InputDataFields.image].shape[2]) + + if num_additional_channels: + if num_additional_channels >= num_channels: + raise ValueError( + 'Image must be already concatenated with additional channels.') + + if (fields.InputDataFields.original_image in tensor_dict and + shape_utils.get_dim_as_int( + tensor_dict[fields.InputDataFields.original_image].shape[2]) == + num_channels): + raise ValueError( + 'Image must be already concatenated with additional channels.') + + padding_shapes = { + fields.InputDataFields.image: [ + height, width, num_channels + ], + fields.InputDataFields.original_image_spatial_shape: [2], + fields.InputDataFields.image_additional_channels: [ + height, width, num_additional_channels + ], + fields.InputDataFields.source_id: [], + fields.InputDataFields.filename: [], + fields.InputDataFields.key: [], + fields.InputDataFields.groundtruth_difficult: [max_num_boxes], + fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], + fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], + fields.InputDataFields.groundtruth_instance_masks: [ + max_num_boxes, height, width + ], + fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], + fields.InputDataFields.groundtruth_group_of: [max_num_boxes], + fields.InputDataFields.groundtruth_area: [max_num_boxes], + fields.InputDataFields.groundtruth_weights: [max_num_boxes], + fields.InputDataFields.groundtruth_confidences: [ + max_num_boxes, num_classes + ], + fields.InputDataFields.num_groundtruth_boxes: [], + fields.InputDataFields.groundtruth_label_types: [max_num_boxes], + fields.InputDataFields.groundtruth_label_weights: [max_num_boxes], + fields.InputDataFields.true_image_shape: [3], + fields.InputDataFields.groundtruth_image_classes: [num_classes], + fields.InputDataFields.groundtruth_image_confidences: [num_classes], + } + + if fields.InputDataFields.original_image in tensor_dict: + padding_shapes[fields.InputDataFields.original_image] = [ + height, width, + shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields. + original_image].shape[2]) + ] + if fields.InputDataFields.groundtruth_keypoints in tensor_dict: + tensor_shape = ( + tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) + padding_shape = [max_num_boxes, + shape_utils.get_dim_as_int(tensor_shape[1]), + shape_utils.get_dim_as_int(tensor_shape[2])] + padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape + if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: + tensor_shape = tensor_dict[fields.InputDataFields. + groundtruth_keypoint_visibilities].shape + padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] + padding_shapes[fields.InputDataFields. + groundtruth_keypoint_visibilities] = padding_shape + + padded_tensor_dict = {} + for tensor_name in tensor_dict: + padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( + tensor_dict[tensor_name], padding_shapes[tensor_name]) + + # Make sure that the number of groundtruth boxes now reflects the + # padded/clipped tensors. + if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: + padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( + tf.minimum( + padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], + max_num_boxes)) + return padded_tensor_dict + + +def augment_input_data(tensor_dict, data_augmentation_options): + """Applies data augmentation ops to input tensors. + + Args: + tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. + data_augmentation_options: A list of tuples, where each tuple contains a + function and a dictionary that contains arguments and their values. + Usually, this is the output of core/preprocessor.build. + + Returns: + A dictionary of tensors obtained by applying data augmentation ops to the + input tensor dictionary. + """ + tensor_dict[fields.InputDataFields.image] = tf.expand_dims( + tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0) + + include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks + in tensor_dict) + include_keypoints = (fields.InputDataFields.groundtruth_keypoints + in tensor_dict) + include_label_weights = (fields.InputDataFields.groundtruth_weights + in tensor_dict) + include_label_confidences = (fields.InputDataFields.groundtruth_confidences + in tensor_dict) + include_multiclass_scores = (fields.InputDataFields.multiclass_scores in + tensor_dict) + tensor_dict = preprocessor.preprocess( + tensor_dict, data_augmentation_options, + func_arg_map=preprocessor.get_default_func_arg_map( + include_label_weights=include_label_weights, + include_label_confidences=include_label_confidences, + include_multiclass_scores=include_multiclass_scores, + include_instance_masks=include_instance_masks, + include_keypoints=include_keypoints)) + tensor_dict[fields.InputDataFields.image] = tf.squeeze( + tensor_dict[fields.InputDataFields.image], axis=0) + return tensor_dict + + +def _get_labels_dict(input_dict): + """Extracts labels dict from input dict.""" + required_label_keys = [ + fields.InputDataFields.num_groundtruth_boxes, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + fields.InputDataFields.groundtruth_weights, + ] + labels_dict = {} + for key in required_label_keys: + labels_dict[key] = input_dict[key] + + optional_label_keys = [ + fields.InputDataFields.groundtruth_confidences, + fields.InputDataFields.groundtruth_keypoints, + fields.InputDataFields.groundtruth_instance_masks, + fields.InputDataFields.groundtruth_area, + fields.InputDataFields.groundtruth_is_crowd, + fields.InputDataFields.groundtruth_difficult + ] + + for key in optional_label_keys: + if key in input_dict: + labels_dict[key] = input_dict[key] + if fields.InputDataFields.groundtruth_difficult in labels_dict: + labels_dict[fields.InputDataFields.groundtruth_difficult] = tf.cast( + labels_dict[fields.InputDataFields.groundtruth_difficult], tf.int32) + return labels_dict + + +def _replace_empty_string_with_random_number(string_tensor): + """Returns string unchanged if non-empty, and random string tensor otherwise. + + The random string is an integer 0 and 2**63 - 1, casted as string. + + + Args: + string_tensor: A tf.tensor of dtype string. + + Returns: + out_string: A tf.tensor of dtype string. If string_tensor contains the empty + string, out_string will contain a random integer casted to a string. + Otherwise string_tensor is returned unchanged. + + """ + + empty_string = tf.constant('', dtype=tf.string, name='EmptyString') + + random_source_id = tf.as_string( + tf.random_uniform(shape=[], maxval=2**63 - 1, dtype=tf.int64)) + + out_string = tf.cond( + tf.equal(string_tensor, empty_string), + true_fn=lambda: random_source_id, + false_fn=lambda: string_tensor) + + return out_string + + +def _get_features_dict(input_dict): + """Extracts features dict from input dict.""" + + source_id = _replace_empty_string_with_random_number( + input_dict[fields.InputDataFields.source_id]) + + hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS) + features = { + fields.InputDataFields.image: + input_dict[fields.InputDataFields.image], + HASH_KEY: tf.cast(hash_from_source_id, tf.int32), + fields.InputDataFields.true_image_shape: + input_dict[fields.InputDataFields.true_image_shape], + fields.InputDataFields.original_image_spatial_shape: + input_dict[fields.InputDataFields.original_image_spatial_shape] + } + if fields.InputDataFields.original_image in input_dict: + features[fields.InputDataFields.original_image] = input_dict[ + fields.InputDataFields.original_image] + if fields.InputDataFields.image_additional_channels in input_dict: + features[fields.InputDataFields.image_additional_channels] = input_dict[ + fields.InputDataFields.image_additional_channels] + return features + + +def create_train_input_fn(train_config, train_input_config, + model_config): + """Creates a train `input` function for `Estimator`. + + Args: + train_config: A train_pb2.TrainConfig. + train_input_config: An input_reader_pb2.InputReader. + model_config: A model_pb2.DetectionModel. + + Returns: + `input_fn` for `Estimator` in TRAIN mode. + """ + + def _train_input_fn(params=None): + return train_input(train_config, train_input_config, model_config, + params=params) + + return _train_input_fn + + +def train_input(train_config, train_input_config, + model_config, model=None, params=None): + """Returns `features` and `labels` tensor dictionaries for training. + + Args: + train_config: A train_pb2.TrainConfig. + train_input_config: An input_reader_pb2.InputReader. + model_config: A model_pb2.DetectionModel. + model: A pre-constructed Detection Model. + If None, one will be created from the config. + params: Parameter dictionary passed from the estimator. + + Returns: + A tf.data.Dataset that holds (features, labels) tuple. + + features: Dictionary of feature tensors. + features[fields.InputDataFields.image] is a [batch_size, H, W, C] + float32 tensor with preprocessed images. + features[HASH_KEY] is a [batch_size] int32 tensor representing unique + identifiers for the images. + features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] + int32 tensor representing the true image shapes, as preprocessed + images could be padded. + features[fields.InputDataFields.original_image] (optional) is a + [batch_size, H, W, C] float32 tensor with original images. + labels: Dictionary of groundtruth tensors. + labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size] + int32 tensor indicating the number of groundtruth boxes. + labels[fields.InputDataFields.groundtruth_boxes] is a + [batch_size, num_boxes, 4] float32 tensor containing the corners of + the groundtruth boxes. + labels[fields.InputDataFields.groundtruth_classes] is a + [batch_size, num_boxes, num_classes] float32 one-hot tensor of + classes. + labels[fields.InputDataFields.groundtruth_weights] is a + [batch_size, num_boxes] float32 tensor containing groundtruth weights + for the boxes. + -- Optional -- + labels[fields.InputDataFields.groundtruth_instance_masks] is a + [batch_size, num_boxes, H, W] float32 tensor containing only binary + values, which represent instance masks for objects. + labels[fields.InputDataFields.groundtruth_keypoints] is a + [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing + keypoints for each box. + + Raises: + TypeError: if the `train_config`, `train_input_config` or `model_config` + are not of the correct type. + """ + if not isinstance(train_config, train_pb2.TrainConfig): + raise TypeError('For training mode, the `train_config` must be a ' + 'train_pb2.TrainConfig.') + if not isinstance(train_input_config, input_reader_pb2.InputReader): + raise TypeError('The `train_input_config` must be a ' + 'input_reader_pb2.InputReader.') + if not isinstance(model_config, model_pb2.DetectionModel): + raise TypeError('The `model_config` must be a ' + 'model_pb2.DetectionModel.') + + if model is None: + model_preprocess_fn = INPUT_BUILDER_UTIL_MAP['model_build']( + model_config, is_training=True).preprocess + else: + model_preprocess_fn = model.preprocess + + def transform_and_pad_input_data_fn(tensor_dict): + """Combines transform and pad operation.""" + data_augmentation_options = [ + preprocessor_builder.build(step) + for step in train_config.data_augmentation_options + ] + data_augmentation_fn = functools.partial( + augment_input_data, + data_augmentation_options=data_augmentation_options) + + image_resizer_config = config_util.get_image_resizer_config(model_config) + image_resizer_fn = image_resizer_builder.build(image_resizer_config) + transform_data_fn = functools.partial( + transform_input_data, model_preprocess_fn=model_preprocess_fn, + image_resizer_fn=image_resizer_fn, + num_classes=config_util.get_number_of_classes(model_config), + data_augmentation_fn=data_augmentation_fn, + merge_multiple_boxes=train_config.merge_multiple_label_boxes, + retain_original_image=train_config.retain_original_images, + use_multiclass_scores=train_config.use_multiclass_scores, + use_bfloat16=train_config.use_bfloat16) + + tensor_dict = pad_input_data_to_static_shapes( + tensor_dict=transform_data_fn(tensor_dict), + max_num_boxes=train_input_config.max_number_of_boxes, + num_classes=config_util.get_number_of_classes(model_config), + spatial_image_shape=config_util.get_spatial_image_size( + image_resizer_config)) + return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict)) + + dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( + train_input_config, + transform_input_data_fn=transform_and_pad_input_data_fn, + batch_size=params['batch_size'] if params else train_config.batch_size) + return dataset + + +def create_eval_input_fn(eval_config, eval_input_config, model_config): + """Creates an eval `input` function for `Estimator`. + + Args: + eval_config: An eval_pb2.EvalConfig. + eval_input_config: An input_reader_pb2.InputReader. + model_config: A model_pb2.DetectionModel. + + Returns: + `input_fn` for `Estimator` in EVAL mode. + """ + + def _eval_input_fn(params=None): + return eval_input(eval_config, eval_input_config, model_config, + params=params) + + return _eval_input_fn + + +def eval_input(eval_config, eval_input_config, model_config, + model=None, params=None): + """Returns `features` and `labels` tensor dictionaries for evaluation. + + Args: + eval_config: An eval_pb2.EvalConfig. + eval_input_config: An input_reader_pb2.InputReader. + model_config: A model_pb2.DetectionModel. + model: A pre-constructed Detection Model. + If None, one will be created from the config. + params: Parameter dictionary passed from the estimator. + + Returns: + A tf.data.Dataset that holds (features, labels) tuple. + + features: Dictionary of feature tensors. + features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor + with preprocessed images. + features[HASH_KEY] is a [1] int32 tensor representing unique + identifiers for the images. + features[fields.InputDataFields.true_image_shape] is a [1, 3] + int32 tensor representing the true image shapes, as preprocessed + images could be padded. + features[fields.InputDataFields.original_image] is a [1, H', W', C] + float32 tensor with the original image. + labels: Dictionary of groundtruth tensors. + labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4] + float32 tensor containing the corners of the groundtruth boxes. + labels[fields.InputDataFields.groundtruth_classes] is a + [num_boxes, num_classes] float32 one-hot tensor of classes. + labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes] + float32 tensor containing object areas. + labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes] + bool tensor indicating if the boxes enclose a crowd. + labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes] + int32 tensor indicating if the boxes represent difficult instances. + -- Optional -- + labels[fields.InputDataFields.groundtruth_instance_masks] is a + [1, num_boxes, H, W] float32 tensor containing only binary values, + which represent instance masks for objects. + + Raises: + TypeError: if the `eval_config`, `eval_input_config` or `model_config` + are not of the correct type. + """ + params = params or {} + if not isinstance(eval_config, eval_pb2.EvalConfig): + raise TypeError('For eval mode, the `eval_config` must be a ' + 'train_pb2.EvalConfig.') + if not isinstance(eval_input_config, input_reader_pb2.InputReader): + raise TypeError('The `eval_input_config` must be a ' + 'input_reader_pb2.InputReader.') + if not isinstance(model_config, model_pb2.DetectionModel): + raise TypeError('The `model_config` must be a ' + 'model_pb2.DetectionModel.') + + if eval_config.force_no_resize: + arch = model_config.WhichOneof('model') + arch_config = getattr(model_config, arch) + image_resizer_proto = image_resizer_pb2.ImageResizer() + image_resizer_proto.identity_resizer.CopyFrom( + image_resizer_pb2.IdentityResizer()) + arch_config.image_resizer.CopyFrom(image_resizer_proto) + + if model is None: + model_preprocess_fn = INPUT_BUILDER_UTIL_MAP['model_build']( + model_config, is_training=False).preprocess + else: + model_preprocess_fn = model.preprocess + + def transform_and_pad_input_data_fn(tensor_dict): + """Combines transform and pad operation.""" + num_classes = config_util.get_number_of_classes(model_config) + + image_resizer_config = config_util.get_image_resizer_config(model_config) + image_resizer_fn = image_resizer_builder.build(image_resizer_config) + + transform_data_fn = functools.partial( + transform_input_data, model_preprocess_fn=model_preprocess_fn, + image_resizer_fn=image_resizer_fn, + num_classes=num_classes, + data_augmentation_fn=None, + retain_original_image=eval_config.retain_original_images, + retain_original_image_additional_channels= + eval_config.retain_original_image_additional_channels) + tensor_dict = pad_input_data_to_static_shapes( + tensor_dict=transform_data_fn(tensor_dict), + max_num_boxes=eval_input_config.max_number_of_boxes, + num_classes=config_util.get_number_of_classes(model_config), + spatial_image_shape=config_util.get_spatial_image_size( + image_resizer_config)) + return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict)) + dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( + eval_input_config, + batch_size=params['batch_size'] if params else eval_config.batch_size, + transform_input_data_fn=transform_and_pad_input_data_fn) + return dataset + + +def create_predict_input_fn(model_config, predict_input_config): + """Creates a predict `input` function for `Estimator`. + + Args: + model_config: A model_pb2.DetectionModel. + predict_input_config: An input_reader_pb2.InputReader. + + Returns: + `input_fn` for `Estimator` in PREDICT mode. + """ + + def _predict_input_fn(params=None): + """Decodes serialized tf.Examples and returns `ServingInputReceiver`. + + Args: + params: Parameter dictionary passed from the estimator. + + Returns: + `ServingInputReceiver`. + """ + del params + example = tf.placeholder(dtype=tf.string, shape=[], name='tf_example') + + num_classes = config_util.get_number_of_classes(model_config) + model_preprocess_fn = INPUT_BUILDER_UTIL_MAP['model_build']( + model_config, is_training=False).preprocess + + image_resizer_config = config_util.get_image_resizer_config(model_config) + image_resizer_fn = image_resizer_builder.build(image_resizer_config) + + transform_fn = functools.partial( + transform_input_data, model_preprocess_fn=model_preprocess_fn, + image_resizer_fn=image_resizer_fn, + num_classes=num_classes, + data_augmentation_fn=None) + + decoder = tf_example_decoder.TfExampleDecoder( + load_instance_masks=False, + num_additional_channels=predict_input_config.num_additional_channels) + input_dict = transform_fn(decoder.decode(example)) + images = tf.cast(input_dict[fields.InputDataFields.image], dtype=tf.float32) + images = tf.expand_dims(images, axis=0) + true_image_shape = tf.expand_dims( + input_dict[fields.InputDataFields.true_image_shape], axis=0) + + return tf.estimator.export.ServingInputReceiver( + features={ + fields.InputDataFields.image: images, + fields.InputDataFields.true_image_shape: true_image_shape}, + receiver_tensors={SERVING_FED_EXAMPLE_KEY: example}) + + return _predict_input_fn diff --git a/inputs_test.py b/inputs_test.py new file mode 100644 index 0000000..3629bb2 --- /dev/null +++ b/inputs_test.py @@ -0,0 +1,1277 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.tflearn.inputs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from object_detection import inputs +from object_detection.core import preprocessor +from object_detection.core import standard_fields as fields +from object_detection.utils import config_util +from object_detection.utils import test_case + +FLAGS = tf.flags.FLAGS + + +def _get_configs_for_model(model_name): + """Returns configurations for model.""" + fname = os.path.join(tf.resource_loader.get_data_files_path(), + 'samples/configs/' + model_name + '.config') + label_map_path = os.path.join(tf.resource_loader.get_data_files_path(), + 'data/pet_label_map.pbtxt') + data_path = os.path.join(tf.resource_loader.get_data_files_path(), + 'test_data/pets_examples.record') + configs = config_util.get_configs_from_pipeline_file(fname) + override_dict = { + 'train_input_path': data_path, + 'eval_input_path': data_path, + 'label_map_path': label_map_path + } + return config_util.merge_external_params_with_configs( + configs, kwargs_dict=override_dict) + + +def _make_initializable_iterator(dataset): + """Creates an iterator, and initializes tables. + + Args: + dataset: A `tf.data.Dataset` object. + + Returns: + A `tf.data.Iterator`. + """ + iterator = dataset.make_initializable_iterator() + tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) + return iterator + + +class InputsTest(test_case.TestCase, parameterized.TestCase): + + def test_faster_rcnn_resnet50_train_input(self): + """Tests the training input function for FasterRcnnResnet50.""" + configs = _get_configs_for_model('faster_rcnn_resnet50_pets') + model_config = configs['model'] + model_config.faster_rcnn.num_classes = 37 + train_input_fn = inputs.create_train_input_fn( + configs['train_config'], configs['train_input_config'], model_config) + features, labels = _make_initializable_iterator(train_input_fn()).get_next() + + self.assertAllEqual([1, None, None, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual([1], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [1, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [1, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [1, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [1, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_confidences].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_confidences].dtype) + + def test_faster_rcnn_resnet50_train_input_with_additional_channels(self): + """Tests the training input function for FasterRcnnResnet50.""" + configs = _get_configs_for_model('faster_rcnn_resnet50_pets') + model_config = configs['model'] + configs['train_input_config'].num_additional_channels = 2 + configs['train_config'].retain_original_images = True + model_config.faster_rcnn.num_classes = 37 + train_input_fn = inputs.create_train_input_fn( + configs['train_config'], configs['train_input_config'], model_config) + features, labels = _make_initializable_iterator(train_input_fn()).get_next() + + self.assertAllEqual([1, None, None, 5], + features[fields.InputDataFields.image].shape.as_list()) + self.assertAllEqual( + [1, None, None, 3], + features[fields.InputDataFields.original_image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual([1], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [1, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [1, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [1, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [1, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_confidences].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_confidences].dtype) + + @parameterized.parameters( + {'eval_batch_size': 1}, + {'eval_batch_size': 8} + ) + def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1): + """Tests the eval input function for FasterRcnnResnet50.""" + configs = _get_configs_for_model('faster_rcnn_resnet50_pets') + model_config = configs['model'] + model_config.faster_rcnn.num_classes = 37 + eval_config = configs['eval_config'] + eval_config.batch_size = eval_batch_size + eval_input_fn = inputs.create_eval_input_fn( + eval_config, configs['eval_input_configs'][0], model_config) + features, labels = _make_initializable_iterator(eval_input_fn()).get_next() + self.assertAllEqual([eval_batch_size, None, None, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual( + [eval_batch_size, None, None, 3], + features[fields.InputDataFields.original_image].shape.as_list()) + self.assertEqual(tf.uint8, + features[fields.InputDataFields.original_image].dtype) + self.assertAllEqual([eval_batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [eval_batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [eval_batch_size, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_area].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_area].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) + self.assertEqual( + tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) + self.assertEqual( + tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) + + def test_ssd_inceptionV2_train_input(self): + """Tests the training input function for SSDInceptionV2.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + model_config = configs['model'] + model_config.ssd.num_classes = 37 + batch_size = configs['train_config'].batch_size + train_input_fn = inputs.create_train_input_fn( + configs['train_config'], configs['train_input_config'], model_config) + features, labels = _make_initializable_iterator(train_input_fn()).get_next() + + self.assertAllEqual([batch_size, 300, 300, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual([batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [batch_size], + labels[fields.InputDataFields.num_groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.int32, + labels[fields.InputDataFields.num_groundtruth_boxes].dtype) + self.assertAllEqual( + [batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [batch_size, 100, model_config.ssd.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [batch_size, 100], + labels[ + fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + + @parameterized.parameters( + {'eval_batch_size': 1}, + {'eval_batch_size': 8} + ) + def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1): + """Tests the eval input function for SSDInceptionV2.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + model_config = configs['model'] + model_config.ssd.num_classes = 37 + eval_config = configs['eval_config'] + eval_config.batch_size = eval_batch_size + eval_input_fn = inputs.create_eval_input_fn( + eval_config, configs['eval_input_configs'][0], model_config) + features, labels = _make_initializable_iterator(eval_input_fn()).get_next() + self.assertAllEqual([eval_batch_size, 300, 300, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual( + [eval_batch_size, 300, 300, 3], + features[fields.InputDataFields.original_image].shape.as_list()) + self.assertEqual(tf.uint8, + features[fields.InputDataFields.original_image].dtype) + self.assertAllEqual([eval_batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [eval_batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [eval_batch_size, 100, model_config.ssd.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[ + fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_area].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_area].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) + self.assertEqual( + tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) + self.assertEqual( + tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) + + def test_ssd_inceptionV2_eval_input_with_additional_channels( + self, eval_batch_size=1): + """Tests the eval input function for SSDInceptionV2 with additional channels. + + Args: + eval_batch_size: Batch size for eval set. + """ + configs = _get_configs_for_model('ssd_inception_v2_pets') + model_config = configs['model'] + model_config.ssd.num_classes = 37 + configs['eval_input_configs'][0].num_additional_channels = 1 + eval_config = configs['eval_config'] + eval_config.batch_size = eval_batch_size + eval_config.retain_original_image_additional_channels = True + eval_input_fn = inputs.create_eval_input_fn( + eval_config, configs['eval_input_configs'][0], model_config) + features, labels = _make_initializable_iterator(eval_input_fn()).get_next() + self.assertAllEqual([eval_batch_size, 300, 300, 4], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual( + [eval_batch_size, 300, 300, 3], + features[fields.InputDataFields.original_image].shape.as_list()) + self.assertEqual(tf.uint8, + features[fields.InputDataFields.original_image].dtype) + self.assertAllEqual([eval_batch_size, 300, 300, 1], features[ + fields.InputDataFields.image_additional_channels].shape.as_list()) + self.assertEqual( + tf.uint8, + features[fields.InputDataFields.image_additional_channels].dtype) + self.assertAllEqual([eval_batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [eval_batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [eval_batch_size, 100, model_config.ssd.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_area].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_area].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) + self.assertEqual(tf.bool, + labels[fields.InputDataFields.groundtruth_is_crowd].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) + self.assertEqual(tf.int32, + labels[fields.InputDataFields.groundtruth_difficult].dtype) + + def test_predict_input(self): + """Tests the predict input function.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + predict_input_fn = inputs.create_predict_input_fn( + model_config=configs['model'], + predict_input_config=configs['eval_input_configs'][0]) + serving_input_receiver = predict_input_fn() + + image = serving_input_receiver.features[fields.InputDataFields.image] + receiver_tensors = serving_input_receiver.receiver_tensors[ + inputs.SERVING_FED_EXAMPLE_KEY] + self.assertEqual([1, 300, 300, 3], image.shape.as_list()) + self.assertEqual(tf.float32, image.dtype) + self.assertEqual(tf.string, receiver_tensors.dtype) + + def test_predict_input_with_additional_channels(self): + """Tests the predict input function with additional channels.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['eval_input_configs'][0].num_additional_channels = 2 + predict_input_fn = inputs.create_predict_input_fn( + model_config=configs['model'], + predict_input_config=configs['eval_input_configs'][0]) + serving_input_receiver = predict_input_fn() + + image = serving_input_receiver.features[fields.InputDataFields.image] + receiver_tensors = serving_input_receiver.receiver_tensors[ + inputs.SERVING_FED_EXAMPLE_KEY] + # RGB + 2 additional channels = 5 channels. + self.assertEqual([1, 300, 300, 5], image.shape.as_list()) + self.assertEqual(tf.float32, image.dtype) + self.assertEqual(tf.string, receiver_tensors.dtype) + + def test_error_with_bad_train_config(self): + """Tests that a TypeError is raised with improper train config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + train_input_fn = inputs.create_train_input_fn( + train_config=configs['eval_config'], # Expecting `TrainConfig`. + train_input_config=configs['train_input_config'], + model_config=configs['model']) + with self.assertRaises(TypeError): + train_input_fn() + + def test_error_with_bad_train_input_config(self): + """Tests that a TypeError is raised with improper train input config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + train_input_fn = inputs.create_train_input_fn( + train_config=configs['train_config'], + train_input_config=configs['model'], # Expecting `InputReader`. + model_config=configs['model']) + with self.assertRaises(TypeError): + train_input_fn() + + def test_error_with_bad_train_model_config(self): + """Tests that a TypeError is raised with improper train model config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + train_input_fn = inputs.create_train_input_fn( + train_config=configs['train_config'], + train_input_config=configs['train_input_config'], + model_config=configs['train_config']) # Expecting `DetectionModel`. + with self.assertRaises(TypeError): + train_input_fn() + + def test_error_with_bad_eval_config(self): + """Tests that a TypeError is raised with improper eval config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + eval_input_fn = inputs.create_eval_input_fn( + eval_config=configs['train_config'], # Expecting `EvalConfig`. + eval_input_config=configs['eval_input_configs'][0], + model_config=configs['model']) + with self.assertRaises(TypeError): + eval_input_fn() + + def test_error_with_bad_eval_input_config(self): + """Tests that a TypeError is raised with improper eval input config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + eval_input_fn = inputs.create_eval_input_fn( + eval_config=configs['eval_config'], + eval_input_config=configs['model'], # Expecting `InputReader`. + model_config=configs['model']) + with self.assertRaises(TypeError): + eval_input_fn() + + def test_error_with_bad_eval_model_config(self): + """Tests that a TypeError is raised with improper eval model config.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['model'].ssd.num_classes = 37 + eval_input_fn = inputs.create_eval_input_fn( + eval_config=configs['eval_config'], + eval_input_config=configs['eval_input_configs'][0], + model_config=configs['eval_config']) # Expecting `DetectionModel`. + with self.assertRaises(TypeError): + eval_input_fn() + + def test_output_equal_in_replace_empty_string_with_random_number(self): + string_placeholder = tf.placeholder(tf.string, shape=[]) + replaced_string = inputs._replace_empty_string_with_random_number( + string_placeholder) + + test_string = b'hello world' + feed_dict = {string_placeholder: test_string} + + with self.test_session() as sess: + out_string = sess.run(replaced_string, feed_dict=feed_dict) + + self.assertEqual(test_string, out_string) + + def test_output_is_integer_in_replace_empty_string_with_random_number(self): + + string_placeholder = tf.placeholder(tf.string, shape=[]) + replaced_string = inputs._replace_empty_string_with_random_number( + string_placeholder) + + empty_string = '' + feed_dict = {string_placeholder: empty_string} + + tf.set_random_seed(0) + + with self.test_session() as sess: + out_string = sess.run(replaced_string, feed_dict=feed_dict) + + # Test whether out_string is a string which represents an integer. + int(out_string) # throws an error if out_string is not castable to int. + + self.assertEqual(out_string, b'2798129067578209328') + + def test_force_no_resize(self): + """Tests the functionality of force_no_reisze option.""" + configs = _get_configs_for_model('ssd_inception_v2_pets') + configs['eval_config'].force_no_resize = True + + eval_input_fn = inputs.create_eval_input_fn( + eval_config=configs['eval_config'], + eval_input_config=configs['eval_input_configs'][0], + model_config=configs['model'] + ) + train_input_fn = inputs.create_train_input_fn( + train_config=configs['train_config'], + train_input_config=configs['train_input_config'], + model_config=configs['model'] + ) + + features_train, _ = _make_initializable_iterator( + train_input_fn()).get_next() + + features_eval, _ = _make_initializable_iterator( + eval_input_fn()).get_next() + + images_train, images_eval = features_train['image'], features_eval['image'] + + self.assertEqual([1, None, None, 3], images_eval.shape.as_list()) + self.assertEqual([24, 300, 300, 3], images_train.shape.as_list()) + + +class DataAugmentationFnTest(test_case.TestCase): + + def test_apply_image_and_box_augmentation(self): + data_augmentation_options = [ + (preprocessor.resize_image, { + 'new_height': 20, + 'new_width': 20, + 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR + }), + (preprocessor.scale_boxes_to_pixel_coordinates, {}), + ] + data_augmentation_fn = functools.partial( + inputs.augment_input_data, + data_augmentation_options=data_augmentation_options) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)) + } + augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) + with self.test_session() as sess: + augmented_tensor_dict_out = sess.run(augmented_tensor_dict) + + self.assertAllEqual( + augmented_tensor_dict_out[fields.InputDataFields.image].shape, + [20, 20, 3] + ) + self.assertAllClose( + augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes], + [[10, 10, 20, 20]] + ) + + def test_apply_image_and_box_augmentation_with_scores(self): + data_augmentation_options = [ + (preprocessor.resize_image, { + 'new_height': 20, + 'new_width': 20, + 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR + }), + (preprocessor.scale_boxes_to_pixel_coordinates, {}), + ] + data_augmentation_fn = functools.partial( + inputs.augment_input_data, + data_augmentation_options=data_augmentation_options) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1.0], np.float32)), + fields.InputDataFields.groundtruth_weights: + tf.constant(np.array([0.8], np.float32)), + } + augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) + with self.test_session() as sess: + augmented_tensor_dict_out = sess.run(augmented_tensor_dict) + + self.assertAllEqual( + augmented_tensor_dict_out[fields.InputDataFields.image].shape, + [20, 20, 3] + ) + self.assertAllClose( + augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes], + [[10, 10, 20, 20]] + ) + self.assertAllClose( + augmented_tensor_dict_out[fields.InputDataFields.groundtruth_classes], + [1.0] + ) + self.assertAllClose( + augmented_tensor_dict_out[ + fields.InputDataFields.groundtruth_weights], + [0.8] + ) + + def test_include_masks_in_data_augmentation(self): + data_augmentation_options = [ + (preprocessor.resize_image, { + 'new_height': 20, + 'new_width': 20, + 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR + }) + ] + data_augmentation_fn = functools.partial( + inputs.augment_input_data, + data_augmentation_options=data_augmentation_options) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_instance_masks: + tf.constant(np.zeros([2, 10, 10], np.uint8)) + } + augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) + with self.test_session() as sess: + augmented_tensor_dict_out = sess.run(augmented_tensor_dict) + + self.assertAllEqual( + augmented_tensor_dict_out[fields.InputDataFields.image].shape, + [20, 20, 3]) + self.assertAllEqual(augmented_tensor_dict_out[ + fields.InputDataFields.groundtruth_instance_masks].shape, [2, 20, 20]) + + def test_include_keypoints_in_data_augmentation(self): + data_augmentation_options = [ + (preprocessor.resize_image, { + 'new_height': 20, + 'new_width': 20, + 'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR + }), + (preprocessor.scale_boxes_to_pixel_coordinates, {}), + ] + data_augmentation_fn = functools.partial( + inputs.augment_input_data, + data_augmentation_options=data_augmentation_options) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(10, 10, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)), + fields.InputDataFields.groundtruth_keypoints: + tf.constant(np.array([[[0.5, 1.0], [0.5, 0.5]]], np.float32)) + } + augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict) + with self.test_session() as sess: + augmented_tensor_dict_out = sess.run(augmented_tensor_dict) + + self.assertAllEqual( + augmented_tensor_dict_out[fields.InputDataFields.image].shape, + [20, 20, 3] + ) + self.assertAllClose( + augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes], + [[10, 10, 20, 20]] + ) + self.assertAllClose( + augmented_tensor_dict_out[fields.InputDataFields.groundtruth_keypoints], + [[[10, 20], [10, 10]]] + ) + + +def _fake_model_preprocessor_fn(image): + return (image, tf.expand_dims(tf.shape(image)[1:], axis=0)) + + +def _fake_image_resizer_fn(image, mask): + return (image, mask, tf.shape(image)) + + +def _fake_resize50_preprocess_fn(image): + image = image[0] + image, shape = preprocessor.resize_to_range( + image, min_dimension=50, max_dimension=50, pad_to_max_dimension=True) + + return tf.expand_dims(image, 0), tf.expand_dims(shape, axis=0) + + +class DataTransformationFnTest(test_case.TestCase): + + def test_combine_additional_channels_if_present(self): + image = np.random.rand(4, 4, 3).astype(np.float32) + additional_channels = np.random.rand(4, 4, 2).astype(np.float32) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(image), + fields.InputDataFields.image_additional_channels: + tf.constant(additional_channels), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1, 1], np.int32)) + } + + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=1) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].dtype, + tf.float32) + self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].shape, + [4, 4, 5]) + self.assertAllClose(transformed_inputs[fields.InputDataFields.image], + np.concatenate((image, additional_channels), axis=2)) + + def test_use_multiclass_scores_when_present(self): + image = np.random.rand(4, 4, 3).astype(np.float32) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(image), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)), + fields.InputDataFields.multiclass_scores: + tf.constant(np.array([0.2, 0.3, 0.5, 0.1, 0.6, 0.3], np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1, 2], np.int32)) + } + + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=3, use_multiclass_scores=True) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllClose( + np.array([[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], np.float32), + transformed_inputs[fields.InputDataFields.groundtruth_classes]) + + def test_use_multiclass_scores_when_not_present(self): + image = np.random.rand(4, 4, 3).astype(np.float32) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(image), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)), + fields.InputDataFields.multiclass_scores: + tf.placeholder(tf.float32), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1, 2], np.int32)) + } + + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=3, use_multiclass_scores=True) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict), + feed_dict={ + tensor_dict[fields.InputDataFields.multiclass_scores]: + np.array([], dtype=np.float32) + }) + + self.assertAllClose( + np.array([[0, 1, 0], [0, 0, 1]], np.float32), + transformed_inputs[fields.InputDataFields.groundtruth_classes]) + + def test_returns_correct_class_label_encodings(self): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)) + } + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_classes], + [[0, 0, 1], [1, 0, 0]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_confidences], + [[0, 0, 1], [1, 0, 0]]) + + def test_returns_correct_labels_with_unrecognized_class(self): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant( + np.array([[0, 0, 1, 1], [.2, .2, 4, 4], [.5, .5, 1, 1]], + np.float32)), + fields.InputDataFields.groundtruth_area: + tf.constant(np.array([.5, .4, .3])), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, -1, 1], np.int32)), + fields.InputDataFields.groundtruth_keypoints: + tf.constant( + np.array([[[.1, .1]], [[.2, .2]], [[.5, .5]]], + np.float32)), + fields.InputDataFields.groundtruth_keypoint_visibilities: + tf.constant([True, False, True]), + fields.InputDataFields.groundtruth_instance_masks: + tf.constant(np.random.rand(3, 4, 4).astype(np.float32)), + fields.InputDataFields.groundtruth_is_crowd: + tf.constant([False, True, False]), + fields.InputDataFields.groundtruth_difficult: + tf.constant(np.array([0, 0, 1], np.int32)) + } + + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_classes], + [[0, 0, 1], [1, 0, 0]]) + self.assertAllEqual( + transformed_inputs[fields.InputDataFields.num_groundtruth_boxes], 2) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_area], [.5, .3]) + self.assertAllEqual( + transformed_inputs[fields.InputDataFields.groundtruth_confidences], + [[0, 0, 1], [1, 0, 0]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_boxes], + [[0, 0, 1, 1], [.5, .5, 1, 1]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_keypoints], + [[[.1, .1]], [[.5, .5]]]) + self.assertAllEqual( + transformed_inputs[ + fields.InputDataFields.groundtruth_keypoint_visibilities], + [True, True]) + self.assertAllEqual( + transformed_inputs[ + fields.InputDataFields.groundtruth_instance_masks].shape, [2, 4, 4]) + self.assertAllEqual( + transformed_inputs[fields.InputDataFields.groundtruth_is_crowd], + [False, False]) + self.assertAllEqual( + transformed_inputs[fields.InputDataFields.groundtruth_difficult], + [0, 1]) + + def test_returns_correct_merged_boxes(self): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1, 1], [.5, .5, 1, 1]], np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)) + } + + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes, + merge_multiple_boxes=True) + + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_boxes], + [[.5, .5, 1., 1.]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_classes], + [[1, 0, 1]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_confidences], + [[1, 0, 1]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.num_groundtruth_boxes], + 1) + + def test_returns_correct_groundtruth_confidences_when_input_present(self): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[0, 0, 1, 1], [.5, .5, 1, 1]], np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)), + fields.InputDataFields.groundtruth_confidences: + tf.constant(np.array([1.0, -1.0], np.float32)) + } + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_classes], + [[0, 0, 1], [1, 0, 0]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_confidences], + [[0, 0, 1], [-1, 0, 0]]) + + def test_returns_resized_masks(self): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(4, 4, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_instance_masks: + tf.constant(np.random.rand(2, 4, 4).astype(np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)), + fields.InputDataFields.original_image_spatial_shape: + tf.constant(np.array([4, 4], np.int32)) + } + + def fake_image_resizer_fn(image, masks=None): + resized_image = tf.image.resize_images(image, [8, 8]) + results = [resized_image] + if masks is not None: + resized_masks = tf.transpose( + tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]), + [2, 0, 1]) + results.append(resized_masks) + results.append(tf.shape(resized_image)) + return results + + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=fake_image_resizer_fn, + num_classes=num_classes, + retain_original_image=True) + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + self.assertAllEqual(transformed_inputs[ + fields.InputDataFields.original_image].dtype, tf.uint8) + self.assertAllEqual(transformed_inputs[ + fields.InputDataFields.original_image_spatial_shape], [4, 4]) + self.assertAllEqual(transformed_inputs[ + fields.InputDataFields.original_image].shape, [8, 8, 3]) + self.assertAllEqual(transformed_inputs[ + fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8]) + + def test_applies_model_preprocess_fn_to_image_tensor(self): + np_image = np.random.randint(256, size=(4, 4, 3)) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np_image), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)) + } + + def fake_model_preprocessor_fn(image): + return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0)) + + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes) + + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + self.assertAllClose(transformed_inputs[fields.InputDataFields.image], + np_image / 255.) + self.assertAllClose(transformed_inputs[fields.InputDataFields. + true_image_shape], + [4, 4, 3]) + + def test_applies_data_augmentation_fn_to_tensor_dict(self): + np_image = np.random.randint(256, size=(4, 4, 3)) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np_image), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)) + } + + def add_one_data_augmentation_fn(tensor_dict): + return {key: value + 1 for key, value in tensor_dict.items()} + + num_classes = 4 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes, + data_augmentation_fn=add_one_data_augmentation_fn) + with self.test_session() as sess: + augmented_tensor_dict = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image], + np_image + 1) + self.assertAllEqual( + augmented_tensor_dict[fields.InputDataFields.groundtruth_classes], + [[0, 0, 0, 1], [0, 1, 0, 0]]) + + def test_applies_data_augmentation_fn_before_model_preprocess_fn(self): + np_image = np.random.randint(256, size=(4, 4, 3)) + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np_image), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([3, 1], np.int32)) + } + + def mul_two_model_preprocessor_fn(image): + return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0)) + + def add_five_to_image_data_augmentation_fn(tensor_dict): + tensor_dict[fields.InputDataFields.image] += 5 + return tensor_dict + + num_classes = 4 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=mul_two_model_preprocessor_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes, + data_augmentation_fn=add_five_to_image_data_augmentation_fn) + with self.test_session() as sess: + augmented_tensor_dict = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + + self.assertAllEqual(augmented_tensor_dict[fields.InputDataFields.image], + (np_image + 5) * 2) + + def test_resize_with_padding(self): + + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(100, 50, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]], + np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1, 2], np.int32)), + fields.InputDataFields.groundtruth_keypoints: + tf.constant([[0.1, 0.2], [0.3, 0.4]]), + } + + num_classes = 3 + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_resize50_preprocess_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes,) + + with self.test_session() as sess: + transformed_inputs = sess.run( + input_transformation_fn(tensor_dict=tensor_dict)) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_boxes], + [[.5, .25, 1., .5], [.0, .0, .5, .25]]) + self.assertAllClose( + transformed_inputs[fields.InputDataFields.groundtruth_keypoints], + [[[.1, .1], [.3, .2]]]) + + +class PadInputDataToStaticShapesFnTest(test_case.TestCase): + + def test_pad_images_boxes_and_classes(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 3]), + fields.InputDataFields.groundtruth_boxes: + tf.placeholder(tf.float32, [None, 4]), + fields.InputDataFields.groundtruth_classes: + tf.placeholder(tf.int32, [None, 3]), + fields.InputDataFields.true_image_shape: + tf.placeholder(tf.int32, [3]), + fields.InputDataFields.original_image_spatial_shape: + tf.placeholder(tf.int32, [2]) + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image].shape.as_list(), + [5, 6, 3]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.true_image_shape] + .shape.as_list(), [3]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape] + .shape.as_list(), [2]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_boxes] + .shape.as_list(), [3, 4]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_classes] + .shape.as_list(), [3, 3]) + + def test_clip_boxes_and_classes(self): + input_tensor_dict = { + fields.InputDataFields.groundtruth_boxes: + tf.placeholder(tf.float32, [None, 4]), + fields.InputDataFields.groundtruth_classes: + tf.placeholder(tf.int32, [None, 3]), + fields.InputDataFields.num_groundtruth_boxes: + tf.placeholder(tf.int32, []) + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_boxes] + .shape.as_list(), [3, 4]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_classes] + .shape.as_list(), [3, 3]) + + with self.test_session() as sess: + out_tensor_dict = sess.run( + padded_tensor_dict, + feed_dict={ + input_tensor_dict[fields.InputDataFields.groundtruth_boxes]: + np.random.rand(5, 4), + input_tensor_dict[fields.InputDataFields.groundtruth_classes]: + np.random.rand(2, 3), + input_tensor_dict[fields.InputDataFields.num_groundtruth_boxes]: + 5, + }) + + self.assertAllEqual( + out_tensor_dict[fields.InputDataFields.groundtruth_boxes].shape, [3, 4]) + self.assertAllEqual( + out_tensor_dict[fields.InputDataFields.groundtruth_classes].shape, + [3, 3]) + self.assertEqual( + out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], + 3) + + def test_do_not_pad_dynamic_images(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 3]), + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[None, None]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image].shape.as_list(), + [None, None, 3]) + + def test_images_and_additional_channels(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 5]), + fields.InputDataFields.image_additional_channels: + tf.placeholder(tf.float32, [None, None, 2]), + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + # pad_input_data_to_static_shape assumes that image is already concatenated + # with additional channels. + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image].shape.as_list(), + [5, 6, 5]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image_additional_channels] + .shape.as_list(), [5, 6, 2]) + + def test_images_and_additional_channels_errors(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 3]), + fields.InputDataFields.image_additional_channels: + tf.placeholder(tf.float32, [None, None, 2]), + fields.InputDataFields.original_image: + tf.placeholder(tf.float32, [None, None, 3]), + } + with self.assertRaises(ValueError): + _ = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + def test_gray_images(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 1]), + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image].shape.as_list(), + [5, 6, 1]) + + def test_gray_images_and_additional_channels(self): + input_tensor_dict = { + fields.InputDataFields.image: + tf.placeholder(tf.float32, [None, None, 3]), + fields.InputDataFields.image_additional_channels: + tf.placeholder(tf.float32, [None, None, 2]), + } + # pad_input_data_to_static_shape assumes that image is already concatenated + # with additional channels. + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image].shape.as_list(), + [5, 6, 3]) + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.image_additional_channels] + .shape.as_list(), [5, 6, 2]) + + def test_keypoints(self): + input_tensor_dict = { + fields.InputDataFields.groundtruth_keypoints: + tf.placeholder(tf.float32, [None, 16, 4]), + fields.InputDataFields.groundtruth_keypoint_visibilities: + tf.placeholder(tf.bool, [None, 16]), + } + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=3, + spatial_image_shape=[5, 6]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_keypoints] + .shape.as_list(), [3, 16, 4]) + self.assertAllEqual( + padded_tensor_dict[ + fields.InputDataFields.groundtruth_keypoint_visibilities] + .shape.as_list(), [3, 16]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/calibration_evaluation.py b/metrics/calibration_evaluation.py new file mode 100644 index 0000000..928c16a --- /dev/null +++ b/metrics/calibration_evaluation.py @@ -0,0 +1,228 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class for evaluating object detections with calibration metrics.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from object_detection.box_coders import mean_stddev_box_coder +from object_detection.core import box_list +from object_detection.core import region_similarity_calculator +from object_detection.core import standard_fields +from object_detection.core import target_assigner +from object_detection.matchers import argmax_matcher +from object_detection.metrics import calibration_metrics +from object_detection.utils import object_detection_evaluation + + +# TODO(zbeaver): Implement metrics per category. +class CalibrationDetectionEvaluator( + object_detection_evaluation.DetectionEvaluator): + """Class to evaluate calibration detection metrics.""" + + def __init__(self, + categories, + iou_threshold=0.5): + """Constructor. + + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + iou_threshold: Threshold above which to consider a box as matched during + evaluation. + """ + super(CalibrationDetectionEvaluator, self).__init__(categories) + + # Constructing target_assigner to match detections to groundtruth. + similarity_calc = region_similarity_calculator.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher( + matched_threshold=iou_threshold, unmatched_threshold=iou_threshold) + box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) + self._target_assigner = target_assigner.TargetAssigner( + similarity_calc, matcher, box_coder) + + def match_single_image_info(self, image_info): + """Match detections to groundtruth for a single image. + + Detections are matched to available groundtruth in the image based on the + IOU threshold from the constructor. The classes of the detections and + groundtruth matches are then compared. Detections that do not have IOU above + the required threshold or have different classes from their match are + considered negative matches. All inputs in `image_info` originate or are + inferred from the eval_dict passed to class method + `get_estimator_eval_metric_ops`. + + Args: + image_info: a tuple or list containing the following (in order): + - gt_boxes: tf.float32 tensor of groundtruth boxes. + - gt_classes: tf.int64 tensor of groundtruth classes associated with + groundtruth boxes. + - num_gt_box: scalar indicating the number of groundtruth boxes per + image. + - det_boxes: tf.float32 tensor of detection boxes. + - det_classes: tf.int64 tensor of detection classes associated with + detection boxes. + - num_det_box: scalar indicating the number of detection boxes per + image. + Returns: + is_class_matched: tf.int64 tensor identical in shape to det_boxes, + indicating whether detection boxes matched with and had the same + class as groundtruth annotations. + """ + (gt_boxes, gt_classes, num_gt_box, det_boxes, det_classes, + num_det_box) = image_info + detection_boxes = det_boxes[:num_det_box] + detection_classes = det_classes[:num_det_box] + groundtruth_boxes = gt_boxes[:num_gt_box] + groundtruth_classes = gt_classes[:num_gt_box] + det_boxlist = box_list.BoxList(detection_boxes) + gt_boxlist = box_list.BoxList(groundtruth_boxes) + + # Target assigner requires classes in one-hot format. An additional + # dimension is required since gt_classes are 1-indexed; the zero index is + # provided to all non-matches. + one_hot_depth = tf.cast(tf.add(tf.reduce_max(groundtruth_classes), 1), + dtype=tf.int32) + gt_classes_one_hot = tf.one_hot( + groundtruth_classes, one_hot_depth, dtype=tf.float32) + one_hot_cls_targets, _, _, _, _ = self._target_assigner.assign( + det_boxlist, + gt_boxlist, + gt_classes_one_hot, + unmatched_class_label=tf.zeros(shape=one_hot_depth, dtype=tf.float32)) + # Transform from one-hot back to indexes. + cls_targets = tf.argmax(one_hot_cls_targets, axis=1) + is_class_matched = tf.cast( + tf.equal(tf.cast(cls_targets, tf.int64), detection_classes), + dtype=tf.int64) + return is_class_matched + + def get_estimator_eval_metric_ops(self, eval_dict): + """Returns a dictionary of eval metric ops. + + Note that once value_op is called, the detections and groundtruth added via + update_op are cleared. + + This function can take in groundtruth and detections for a batch of images, + or for a single image. For the latter case, the batch dimension for input + tensors need not be present. + + Args: + eval_dict: A dictionary that holds tensors for evaluating object detection + performance. For single-image evaluation, this dictionary may be + produced from eval_util.result_dict_for_single_example(). If multi-image + evaluation, `eval_dict` should contain the fields + 'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to + properly unpad the tensors from the batch. + + Returns: + a dictionary of metric names to tuple of value_op and update_op that can + be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all + update ops must be run together and similarly all value ops must be run + together to guarantee correct behaviour. + """ + # Unpack items from the evaluation dictionary. + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + image_id = eval_dict[input_data_fields.key] + groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] + groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] + detection_boxes = eval_dict[detection_fields.detection_boxes] + detection_scores = eval_dict[detection_fields.detection_scores] + detection_classes = eval_dict[detection_fields.detection_classes] + num_gt_boxes_per_image = eval_dict.get( + 'num_groundtruth_boxes_per_image', None) + num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None) + is_annotated_batched = eval_dict.get('is_annotated', None) + + if not image_id.shape.as_list(): + # Apply a batch dimension to all tensors. + image_id = tf.expand_dims(image_id, 0) + groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) + groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) + detection_boxes = tf.expand_dims(detection_boxes, 0) + detection_scores = tf.expand_dims(detection_scores, 0) + detection_classes = tf.expand_dims(detection_classes, 0) + + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] + else: + num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0) + + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.shape(detection_boxes)[1:2] + else: + num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0) + + if is_annotated_batched is None: + is_annotated_batched = tf.constant([True]) + else: + is_annotated_batched = tf.expand_dims(is_annotated_batched, 0) + else: + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.tile( + tf.shape(groundtruth_boxes)[1:2], + multiples=tf.shape(groundtruth_boxes)[0:1]) + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.tile( + tf.shape(detection_boxes)[1:2], + multiples=tf.shape(detection_boxes)[0:1]) + if is_annotated_batched is None: + is_annotated_batched = tf.ones_like(image_id, dtype=tf.bool) + + # Filter images based on is_annotated_batched and match detections. + image_info = [tf.boolean_mask(tensor, is_annotated_batched) for tensor in + [groundtruth_boxes, groundtruth_classes, + num_gt_boxes_per_image, detection_boxes, detection_classes, + num_det_boxes_per_image]] + is_class_matched = tf.map_fn( + self.match_single_image_info, image_info, dtype=tf.int64) + y_true = tf.squeeze(is_class_matched) + y_pred = tf.squeeze(tf.boolean_mask(detection_scores, is_annotated_batched)) + ece, update_op = calibration_metrics.expected_calibration_error( + y_true, y_pred) + return {'CalibrationError/ExpectedCalibrationError': (ece, update_op)} + + def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): + """Adds groundtruth for a single image to be used for evaluation. + + Args: + image_id: A unique string/integer identifier for the image. + groundtruth_dict: A dictionary of groundtruth numpy arrays required + for evaluations. + """ + raise NotImplementedError + + def add_single_detected_image_info(self, image_id, detections_dict): + """Adds detections for a single image to be used for evaluation. + + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary of detection numpy arrays required for + evaluation. + """ + raise NotImplementedError + + def evaluate(self): + """Evaluates detections and returns a dictionary of metrics.""" + raise NotImplementedError + + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + raise NotImplementedError diff --git a/metrics/calibration_evaluation_test.py b/metrics/calibration_evaluation_test.py new file mode 100644 index 0000000..422567e --- /dev/null +++ b/metrics/calibration_evaluation_test.py @@ -0,0 +1,200 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow_models.object_detection.metrics.calibration_evaluation.""" # pylint: disable=line-too-long + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from object_detection.core import standard_fields +from object_detection.metrics import calibration_evaluation + + +def _get_categories_list(): + return [{ + 'id': 1, + 'name': 'person' + }, { + 'id': 2, + 'name': 'dog' + }, { + 'id': 3, + 'name': 'cat' + }] + + +class CalibrationDetectionEvaluationTest(tf.test.TestCase): + + def _get_ece(self, ece_op, update_op): + """Return scalar expected calibration error.""" + with self.test_session() as sess: + metrics_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) + sess.run(tf.variables_initializer(var_list=metrics_vars)) + _ = sess.run(update_op) + return sess.run(ece_op) + + def testGetECEWithMatchingGroundtruthAndDetections(self): + """Tests that ECE is calculated correctly when box matches exist.""" + calibration_evaluator = calibration_evaluation.CalibrationDetectionEvaluator( + _get_categories_list(), iou_threshold=0.5) + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + # All gt and detection boxes match. + base_eval_dict = { + input_data_fields.key: + tf.constant(['image_1', 'image_2', 'image_3']), + input_data_fields.groundtruth_boxes: + tf.constant([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]], + dtype=tf.float32), + detection_fields.detection_boxes: + tf.constant([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]], + dtype=tf.float32), + input_data_fields.groundtruth_classes: + tf.constant([[1], [2], [3]], dtype=tf.int64), + # Note that, in the zero ECE case, the detection class for image_2 + # should NOT match groundtruth, since the detection score is zero. + detection_fields.detection_scores: + tf.constant([[1.0], [0.0], [1.0]], dtype=tf.float32) + } + + # Zero ECE (perfectly calibrated). + zero_ece_eval_dict = base_eval_dict.copy() + zero_ece_eval_dict[detection_fields.detection_classes] = tf.constant( + [[1], [1], [3]], dtype=tf.int64) + zero_ece_op, zero_ece_update_op = ( + calibration_evaluator.get_estimator_eval_metric_ops(zero_ece_eval_dict) + ['CalibrationError/ExpectedCalibrationError']) + zero_ece = self._get_ece(zero_ece_op, zero_ece_update_op) + self.assertAlmostEqual(zero_ece, 0.0) + + # ECE of 1 (poorest calibration). + one_ece_eval_dict = base_eval_dict.copy() + one_ece_eval_dict[detection_fields.detection_classes] = tf.constant( + [[3], [2], [1]], dtype=tf.int64) + one_ece_op, one_ece_update_op = ( + calibration_evaluator.get_estimator_eval_metric_ops(one_ece_eval_dict) + ['CalibrationError/ExpectedCalibrationError']) + one_ece = self._get_ece(one_ece_op, one_ece_update_op) + self.assertAlmostEqual(one_ece, 1.0) + + def testGetECEWithUnmatchedGroundtruthAndDetections(self): + """Tests that ECE is correctly calculated when boxes are unmatched.""" + calibration_evaluator = calibration_evaluation.CalibrationDetectionEvaluator( + _get_categories_list(), iou_threshold=0.5) + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + # No gt and detection boxes match. + eval_dict = { + input_data_fields.key: + tf.constant(['image_1', 'image_2', 'image_3']), + input_data_fields.groundtruth_boxes: + tf.constant([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]], + dtype=tf.float32), + detection_fields.detection_boxes: + tf.constant([[[50., 50., 100., 100.]], + [[25., 25., 50., 50.]], + [[100., 100., 200., 200.]]], + dtype=tf.float32), + input_data_fields.groundtruth_classes: + tf.constant([[1], [2], [3]], dtype=tf.int64), + detection_fields.detection_classes: + tf.constant([[1], [1], [3]], dtype=tf.int64), + # Detection scores of zero when boxes are unmatched = ECE of zero. + detection_fields.detection_scores: + tf.constant([[0.0], [0.0], [0.0]], dtype=tf.float32) + } + + ece_op, update_op = calibration_evaluator.get_estimator_eval_metric_ops( + eval_dict)['CalibrationError/ExpectedCalibrationError'] + ece = self._get_ece(ece_op, update_op) + self.assertAlmostEqual(ece, 0.0) + + def testGetECEWithBatchedDetections(self): + """Tests that ECE is correct with multiple detections per image.""" + calibration_evaluator = calibration_evaluation.CalibrationDetectionEvaluator( + _get_categories_list(), iou_threshold=0.5) + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + # Note that image_2 has mismatched classes and detection scores but should + # still produce ECE of 0 because detection scores are also 0. + eval_dict = { + input_data_fields.key: + tf.constant(['image_1', 'image_2', 'image_3']), + input_data_fields.groundtruth_boxes: + tf.constant([[[100., 100., 200., 200.], [50., 50., 100., 100.]], + [[50., 50., 100., 100.], [100., 100., 200., 200.]], + [[25., 25., 50., 50.], [100., 100., 200., 200.]]], + dtype=tf.float32), + detection_fields.detection_boxes: + tf.constant([[[100., 100., 200., 200.], [50., 50., 100., 100.]], + [[50., 50., 100., 100.], [25., 25., 50., 50.]], + [[25., 25., 50., 50.], [100., 100., 200., 200.]]], + dtype=tf.float32), + input_data_fields.groundtruth_classes: + tf.constant([[1, 2], [2, 3], [3, 1]], dtype=tf.int64), + detection_fields.detection_classes: + tf.constant([[1, 2], [1, 1], [3, 1]], dtype=tf.int64), + detection_fields.detection_scores: + tf.constant([[1.0, 1.0], [0.0, 0.0], [1.0, 1.0]], dtype=tf.float32) + } + + ece_op, update_op = calibration_evaluator.get_estimator_eval_metric_ops( + eval_dict)['CalibrationError/ExpectedCalibrationError'] + ece = self._get_ece(ece_op, update_op) + self.assertAlmostEqual(ece, 0.0) + + def testGetECEWhenImagesFilteredByIsAnnotated(self): + """Tests that ECE is correct when detections filtered by is_annotated.""" + calibration_evaluator = calibration_evaluation.CalibrationDetectionEvaluator( + _get_categories_list(), iou_threshold=0.5) + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + # ECE will be 0 only if the third image is filtered by is_annotated. + eval_dict = { + input_data_fields.key: + tf.constant(['image_1', 'image_2', 'image_3']), + input_data_fields.groundtruth_boxes: + tf.constant([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]], + dtype=tf.float32), + detection_fields.detection_boxes: + tf.constant([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]], + dtype=tf.float32), + input_data_fields.groundtruth_classes: + tf.constant([[1], [2], [1]], dtype=tf.int64), + detection_fields.detection_classes: + tf.constant([[1], [1], [3]], dtype=tf.int64), + detection_fields.detection_scores: + tf.constant([[1.0], [0.0], [1.0]], dtype=tf.float32), + 'is_annotated': tf.constant([True, True, False], dtype=tf.bool) + } + + ece_op, update_op = calibration_evaluator.get_estimator_eval_metric_ops( + eval_dict)['CalibrationError/ExpectedCalibrationError'] + ece = self._get_ece(ece_op, update_op) + self.assertAlmostEqual(ece, 0.0) + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/calibration_metrics.py b/metrics/calibration_metrics.py new file mode 100644 index 0000000..a94f460 --- /dev/null +++ b/metrics/calibration_metrics.py @@ -0,0 +1,118 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Object detection calibration metrics. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.ops import metrics_impl + + +def _safe_div(numerator, denominator): + """Divides two tensors element-wise, returning 0 if the denominator is <= 0. + + Args: + numerator: A real `Tensor`. + denominator: A real `Tensor`, with dtype matching `numerator`. + + Returns: + 0 if `denominator` <= 0, else `numerator` / `denominator` + """ + t = tf.truediv(numerator, denominator) + zero = tf.zeros_like(t, dtype=denominator.dtype) + condition = tf.greater(denominator, zero) + zero = tf.cast(zero, t.dtype) + return tf.where(condition, t, zero) + + +def _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name): + """Calculates Expected Calibration Error from accumulated statistics.""" + bin_accuracies = _safe_div(bin_true_sum, bin_counts) + bin_confidences = _safe_div(bin_preds_sum, bin_counts) + abs_bin_errors = tf.abs(bin_accuracies - bin_confidences) + bin_weights = _safe_div(bin_counts, tf.reduce_sum(bin_counts)) + return tf.reduce_sum(abs_bin_errors * bin_weights, name=name) + + +def expected_calibration_error(y_true, y_pred, nbins=20): + """Calculates Expected Calibration Error (ECE). + + ECE is a scalar summary statistic of calibration error. It is the + sample-weighted average of the difference between the predicted and true + probabilities of a positive detection across uniformly-spaced model + confidences [0, 1]. See referenced paper for a thorough explanation. + + Reference: + Guo, et. al, "On Calibration of Modern Neural Networks" + Page 2, Expected Calibration Error (ECE). + https://arxiv.org/pdf/1706.04599.pdf + + This function creates three local variables, `bin_counts`, `bin_true_sum`, and + `bin_preds_sum` that are used to compute ECE. For estimation of the metric + over a stream of data, the function creates an `update_op` operation that + updates these variables and returns the ECE. + + Args: + y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each + prediction in y_pred. + y_pred: 1-D tf.float32 tensor of model confidence scores in range + [0.0, 1.0]. + nbins: int specifying the number of uniformly-spaced bins into which y_pred + will be bucketed. + + Returns: + value_op: A value metric op that returns ece. + update_op: An operation that increments the `bin_counts`, `bin_true_sum`, + and `bin_preds_sum` variables appropriately and whose value matches `ece`. + + Raises: + InvalidArgumentError: if y_pred is not in [0.0, 1.0]. + """ + bin_counts = metrics_impl.metric_variable( + [nbins], tf.float32, name='bin_counts') + bin_true_sum = metrics_impl.metric_variable( + [nbins], tf.float32, name='true_sum') + bin_preds_sum = metrics_impl.metric_variable( + [nbins], tf.float32, name='preds_sum') + + with tf.control_dependencies([ + tf.assert_greater_equal(y_pred, 0.0), + tf.assert_less_equal(y_pred, 1.0), + ]): + bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0], nbins=nbins) + + with tf.control_dependencies([bin_ids]): + update_bin_counts_op = tf.assign_add( + bin_counts, tf.cast(tf.bincount(bin_ids, minlength=nbins), + dtype=tf.float32)) + update_bin_true_sum_op = tf.assign_add( + bin_true_sum, + tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins), + dtype=tf.float32)) + update_bin_preds_sum_op = tf.assign_add( + bin_preds_sum, + tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins), + dtype=tf.float32)) + + ece_update_op = _ece_from_bins( + update_bin_counts_op, + update_bin_true_sum_op, + update_bin_preds_sum_op, + name='update_op') + ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value') + return ece, ece_update_op diff --git a/metrics/calibration_metrics_test.py b/metrics/calibration_metrics_test.py new file mode 100644 index 0000000..4518293 --- /dev/null +++ b/metrics/calibration_metrics_test.py @@ -0,0 +1,109 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for calibration_metrics.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf +from object_detection.metrics import calibration_metrics + + +class CalibrationLibTest(tf.test.TestCase): + + @staticmethod + def _get_calibration_placeholders(): + """Returns TF placeholders for y_true and y_pred.""" + return (tf.placeholder(tf.int64, shape=(None)), + tf.placeholder(tf.float32, shape=(None))) + + def test_expected_calibration_error_all_bins_filled(self): + """Test expected calibration error when all bins contain predictions.""" + y_true, y_pred = self._get_calibration_placeholders() + expected_ece_op, update_op = calibration_metrics.expected_calibration_error( + y_true, y_pred, nbins=2) + with self.test_session() as sess: + metrics_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) + sess.run(tf.variables_initializer(var_list=metrics_vars)) + # Bin calibration errors (|confidence - accuracy| * bin_weight): + # - [0,0.5): |0.2 - 0.333| * (3/5) = 0.08 + # - [0.5, 1]: |0.75 - 0.5| * (2/5) = 0.1 + sess.run( + update_op, + feed_dict={ + y_pred: np.array([0., 0.2, 0.4, 0.5, 1.0]), + y_true: np.array([0, 0, 1, 0, 1]) + }) + actual_ece = 0.08 + 0.1 + expected_ece = sess.run(expected_ece_op) + self.assertAlmostEqual(actual_ece, expected_ece) + + def test_expected_calibration_error_all_bins_not_filled(self): + """Test expected calibration error when no predictions for one bin.""" + y_true, y_pred = self._get_calibration_placeholders() + expected_ece_op, update_op = calibration_metrics.expected_calibration_error( + y_true, y_pred, nbins=2) + with self.test_session() as sess: + metrics_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) + sess.run(tf.variables_initializer(var_list=metrics_vars)) + # Bin calibration errors (|confidence - accuracy| * bin_weight): + # - [0,0.5): |0.2 - 0.333| * (3/5) = 0.08 + # - [0.5, 1]: |0.75 - 0.5| * (2/5) = 0.1 + sess.run( + update_op, + feed_dict={ + y_pred: np.array([0., 0.2, 0.4]), + y_true: np.array([0, 0, 1]) + }) + actual_ece = np.abs(0.2 - (1 / 3.)) + expected_ece = sess.run(expected_ece_op) + self.assertAlmostEqual(actual_ece, expected_ece) + + def test_expected_calibration_error_with_multiple_data_streams(self): + """Test expected calibration error when multiple data batches provided.""" + y_true, y_pred = self._get_calibration_placeholders() + expected_ece_op, update_op = calibration_metrics.expected_calibration_error( + y_true, y_pred, nbins=2) + with self.test_session() as sess: + metrics_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) + sess.run(tf.variables_initializer(var_list=metrics_vars)) + # Identical data to test_expected_calibration_error_all_bins_filled, + # except split over three batches. + sess.run( + update_op, + feed_dict={ + y_pred: np.array([0., 0.2]), + y_true: np.array([0, 0]) + }) + sess.run( + update_op, + feed_dict={ + y_pred: np.array([0.4, 0.5]), + y_true: np.array([1, 0]) + }) + sess.run( + update_op, feed_dict={ + y_pred: np.array([1.0]), + y_true: np.array([1]) + }) + actual_ece = 0.08 + 0.1 + expected_ece = sess.run(expected_ece_op) + self.assertAlmostEqual(actual_ece, expected_ece) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/coco_evaluation.py b/metrics/coco_evaluation.py new file mode 100644 index 0000000..3749b27 --- /dev/null +++ b/metrics/coco_evaluation.py @@ -0,0 +1,762 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class for evaluating object detections with COCO metrics.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import zip +import tensorflow as tf + +from object_detection.core import standard_fields +from object_detection.metrics import coco_tools +from object_detection.utils import json_utils +from object_detection.utils import object_detection_evaluation + + +class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): + """Class to evaluate COCO detection metrics.""" + + def __init__(self, + categories, + include_metrics_per_category=False, + all_metrics_per_category=False): + """Constructor. + + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + include_metrics_per_category: If True, include metrics for each category. + all_metrics_per_category: Whether to include all the summary metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + """ + super(CocoDetectionEvaluator, self).__init__(categories) + # _image_ids is a dictionary that maps unique image ids to Booleans which + # indicate whether a corresponding detection has been added. + self._image_ids = {} + self._groundtruth_list = [] + self._detection_boxes_list = [] + self._category_id_set = set([cat['id'] for cat in self._categories]) + self._annotation_id = 1 + self._metrics = None + self._include_metrics_per_category = include_metrics_per_category + self._all_metrics_per_category = all_metrics_per_category + + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + self._image_ids.clear() + self._groundtruth_list = [] + self._detection_boxes_list = [] + + def add_single_ground_truth_image_info(self, + image_id, + groundtruth_dict): + """Adds groundtruth for a single image to be used for evaluation. + + If the image has already been added, a warning is logged, and groundtruth is + ignored. + + Args: + image_id: A unique string/integer identifier for the image. + groundtruth_dict: A dictionary containing - + InputDataFields.groundtruth_boxes: float32 numpy array of shape + [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format + [ymin, xmin, ymax, xmax] in absolute image coordinates. + InputDataFields.groundtruth_classes: integer numpy array of shape + [num_boxes] containing 1-indexed groundtruth classes for the boxes. + InputDataFields.groundtruth_is_crowd (optional): integer numpy array of + shape [num_boxes] containing iscrowd flag for groundtruth boxes. + """ + if image_id in self._image_ids: + tf.logging.warning('Ignoring ground truth with image id %s since it was ' + 'previously added', image_id) + return + + groundtruth_is_crowd = groundtruth_dict.get( + standard_fields.InputDataFields.groundtruth_is_crowd) + # Drop groundtruth_is_crowd if empty tensor. + if groundtruth_is_crowd is not None and not groundtruth_is_crowd.shape[0]: + groundtruth_is_crowd = None + + self._groundtruth_list.extend( + coco_tools.ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self._annotation_id, + category_id_set=self._category_id_set, + groundtruth_boxes=groundtruth_dict[ + standard_fields.InputDataFields.groundtruth_boxes], + groundtruth_classes=groundtruth_dict[ + standard_fields.InputDataFields.groundtruth_classes], + groundtruth_is_crowd=groundtruth_is_crowd)) + self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. + groundtruth_boxes].shape[0] + # Boolean to indicate whether a detection has been added for this image. + self._image_ids[image_id] = False + + def add_single_detected_image_info(self, + image_id, + detections_dict): + """Adds detections for a single image to be used for evaluation. + + If a detection has already been added for this image id, a warning is + logged, and the detection is skipped. + + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary containing - + DetectionResultFields.detection_boxes: float32 numpy array of shape + [num_boxes, 4] containing `num_boxes` detection boxes of the format + [ymin, xmin, ymax, xmax] in absolute image coordinates. + DetectionResultFields.detection_scores: float32 numpy array of shape + [num_boxes] containing detection scores for the boxes. + DetectionResultFields.detection_classes: integer numpy array of shape + [num_boxes] containing 1-indexed detection classes for the boxes. + + Raises: + ValueError: If groundtruth for the image_id is not available. + """ + if image_id not in self._image_ids: + raise ValueError('Missing groundtruth for image id: {}'.format(image_id)) + + if self._image_ids[image_id]: + tf.logging.warning('Ignoring detection with image id %s since it was ' + 'previously added', image_id) + return + + self._detection_boxes_list.extend( + coco_tools.ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self._category_id_set, + detection_boxes=detections_dict[standard_fields. + DetectionResultFields + .detection_boxes], + detection_scores=detections_dict[standard_fields. + DetectionResultFields. + detection_scores], + detection_classes=detections_dict[standard_fields. + DetectionResultFields. + detection_classes])) + self._image_ids[image_id] = True + + def dump_detections_to_json_file(self, json_output_path): + """Saves the detections into json_output_path in the format used by MS COCO. + + Args: + json_output_path: String containing the output file's path. It can be also + None. In that case nothing will be written to the output file. + """ + if json_output_path and json_output_path is not None: + with tf.gfile.GFile(json_output_path, 'w') as fid: + tf.logging.info('Dumping detections to output json file.') + json_utils.Dump( + obj=self._detection_boxes_list, fid=fid, float_digits=4, indent=2) + + def evaluate(self): + """Evaluates the detection boxes and returns a dictionary of coco metrics. + + Returns: + A dictionary holding - + + 1. summary_metrics: + 'DetectionBoxes_Precision/mAP': mean average precision over classes + averaged over IOU thresholds ranging from .5 to .95 with .05 + increments. + 'DetectionBoxes_Precision/mAP@.50IOU': mean average precision at 50% IOU + 'DetectionBoxes_Precision/mAP@.75IOU': mean average precision at 75% IOU + 'DetectionBoxes_Precision/mAP (small)': mean average precision for small + objects (area < 32^2 pixels). + 'DetectionBoxes_Precision/mAP (medium)': mean average precision for + medium sized objects (32^2 pixels < area < 96^2 pixels). + 'DetectionBoxes_Precision/mAP (large)': mean average precision for large + objects (96^2 pixels < area < 10000^2 pixels). + 'DetectionBoxes_Recall/AR@1': average recall with 1 detection. + 'DetectionBoxes_Recall/AR@10': average recall with 10 detections. + 'DetectionBoxes_Recall/AR@100': average recall with 100 detections. + 'DetectionBoxes_Recall/AR@100 (small)': average recall for small objects + with 100. + 'DetectionBoxes_Recall/AR@100 (medium)': average recall for medium objects + with 100. + 'DetectionBoxes_Recall/AR@100 (large)': average recall for large objects + with 100 detections. + + 2. per_category_ap: if include_metrics_per_category is True, category + specific results with keys of the form: + 'Precision mAP ByCategory/category' (without the supercategory part if + no supercategories exist). For backward compatibility + 'PerformanceByCategory' is included in the output regardless of + all_metrics_per_category. + """ + tf.logging.info('Performing evaluation on %d images.', len(self._image_ids)) + groundtruth_dict = { + 'annotations': self._groundtruth_list, + 'images': [{'id': image_id} for image_id in self._image_ids], + 'categories': self._categories + } + coco_wrapped_groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self._detection_boxes_list) + box_evaluator = coco_tools.COCOEvalWrapper( + coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=self._include_metrics_per_category, + all_metrics_per_category=self._all_metrics_per_category) + box_metrics.update(box_per_category_ap) + box_metrics = {'DetectionBoxes_'+ key: value + for key, value in iter(box_metrics.items())} + return box_metrics + + def add_eval_dict(self, eval_dict): + """Observes an evaluation result dict for a single example. + + When executing eagerly, once all observations have been observed by this + method you can use `.evaluate()` to get the final metrics. + + When using `tf.estimator.Estimator` for evaluation this function is used by + `get_estimator_eval_metric_ops()` to construct the metric update op. + + Args: + eval_dict: A dictionary that holds tensors for evaluating an object + detection model, returned from + eval_util.result_dict_for_single_example(). + + Returns: + None when executing eagerly, or an update_op that can be used to update + the eval metrics in `tf.estimator.EstimatorSpec`. + """ + def update_op( + image_id_batched, + groundtruth_boxes_batched, + groundtruth_classes_batched, + groundtruth_is_crowd_batched, + num_gt_boxes_per_image, + detection_boxes_batched, + detection_scores_batched, + detection_classes_batched, + num_det_boxes_per_image, + is_annotated_batched): + """Update operation for adding batch of images to Coco evaluator.""" + + for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box, + det_score, det_class, num_det_box, is_annotated) in zip( + image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, groundtruth_is_crowd_batched, + num_gt_boxes_per_image, + detection_boxes_batched, detection_scores_batched, + detection_classes_batched, num_det_boxes_per_image, + is_annotated_batched): + if is_annotated: + self.add_single_ground_truth_image_info( + image_id, { + 'groundtruth_boxes': gt_box[:num_gt_box], + 'groundtruth_classes': gt_class[:num_gt_box], + 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box] + }) + self.add_single_detected_image_info( + image_id, + {'detection_boxes': det_box[:num_det_box], + 'detection_scores': det_score[:num_det_box], + 'detection_classes': det_class[:num_det_box]}) + + # Unpack items from the evaluation dictionary. + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + image_id = eval_dict[input_data_fields.key] + groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] + groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] + groundtruth_is_crowd = eval_dict.get( + input_data_fields.groundtruth_is_crowd, None) + detection_boxes = eval_dict[detection_fields.detection_boxes] + detection_scores = eval_dict[detection_fields.detection_scores] + detection_classes = eval_dict[detection_fields.detection_classes] + num_gt_boxes_per_image = eval_dict.get( + 'num_groundtruth_boxes_per_image', None) + num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None) + is_annotated = eval_dict.get('is_annotated', None) + + if groundtruth_is_crowd is None: + groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) + if not image_id.shape.as_list(): + # Apply a batch dimension to all tensors. + image_id = tf.expand_dims(image_id, 0) + groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) + groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) + groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0) + detection_boxes = tf.expand_dims(detection_boxes, 0) + detection_scores = tf.expand_dims(detection_scores, 0) + detection_classes = tf.expand_dims(detection_classes, 0) + + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] + else: + num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0) + + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.shape(detection_boxes)[1:2] + else: + num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0) + + if is_annotated is None: + is_annotated = tf.constant([True]) + else: + is_annotated = tf.expand_dims(is_annotated, 0) + else: + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.tile( + tf.shape(groundtruth_boxes)[1:2], + multiples=tf.shape(groundtruth_boxes)[0:1]) + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.tile( + tf.shape(detection_boxes)[1:2], + multiples=tf.shape(detection_boxes)[0:1]) + if is_annotated is None: + is_annotated = tf.ones_like(image_id, dtype=tf.bool) + + return tf.py_func(update_op, [image_id, + groundtruth_boxes, + groundtruth_classes, + groundtruth_is_crowd, + num_gt_boxes_per_image, + detection_boxes, + detection_scores, + detection_classes, + num_det_boxes_per_image, + is_annotated], []) + + def get_estimator_eval_metric_ops(self, eval_dict): + """Returns a dictionary of eval metric ops. + + Note that once value_op is called, the detections and groundtruth added via + update_op are cleared. + + This function can take in groundtruth and detections for a batch of images, + or for a single image. For the latter case, the batch dimension for input + tensors need not be present. + + Args: + eval_dict: A dictionary that holds tensors for evaluating object detection + performance. For single-image evaluation, this dictionary may be + produced from eval_util.result_dict_for_single_example(). If multi-image + evaluation, `eval_dict` should contain the fields + 'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to + properly unpad the tensors from the batch. + + Returns: + a dictionary of metric names to tuple of value_op and update_op that can + be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all + update ops must be run together and similarly all value ops must be run + together to guarantee correct behaviour. + """ + update_op = self.add_eval_dict(eval_dict) + metric_names = ['DetectionBoxes_Precision/mAP', + 'DetectionBoxes_Precision/mAP@.50IOU', + 'DetectionBoxes_Precision/mAP@.75IOU', + 'DetectionBoxes_Precision/mAP (large)', + 'DetectionBoxes_Precision/mAP (medium)', + 'DetectionBoxes_Precision/mAP (small)', + 'DetectionBoxes_Recall/AR@1', + 'DetectionBoxes_Recall/AR@10', + 'DetectionBoxes_Recall/AR@100', + 'DetectionBoxes_Recall/AR@100 (large)', + 'DetectionBoxes_Recall/AR@100 (medium)', + 'DetectionBoxes_Recall/AR@100 (small)'] + if self._include_metrics_per_category: + for category_dict in self._categories: + metric_names.append('DetectionBoxes_PerformanceByCategory/mAP/' + + category_dict['name']) + + def first_value_func(): + self._metrics = self.evaluate() + self.clear() + return np.float32(self._metrics[metric_names[0]]) + + def value_func_factory(metric_name): + def value_func(): + return np.float32(self._metrics[metric_name]) + return value_func + + # Ensure that the metrics are only evaluated once. + first_value_op = tf.py_func(first_value_func, [], tf.float32) + eval_metric_ops = {metric_names[0]: (first_value_op, update_op)} + with tf.control_dependencies([first_value_op]): + for metric_name in metric_names[1:]: + eval_metric_ops[metric_name] = (tf.py_func( + value_func_factory(metric_name), [], np.float32), update_op) + return eval_metric_ops + + +def _check_mask_type_and_value(array_name, masks): + """Checks whether mask dtype is uint8 and the values are either 0 or 1.""" + if masks.dtype != np.uint8: + raise ValueError('{} must be of type np.uint8. Found {}.'.format( + array_name, masks.dtype)) + if np.any(np.logical_and(masks != 0, masks != 1)): + raise ValueError('{} elements can only be either 0 or 1.'.format( + array_name)) + + +class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): + """Class to evaluate COCO detection metrics.""" + + def __init__(self, categories, include_metrics_per_category=False): + """Constructor. + + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + include_metrics_per_category: If True, include metrics for each category. + """ + super(CocoMaskEvaluator, self).__init__(categories) + self._image_id_to_mask_shape_map = {} + self._image_ids_with_detections = set([]) + self._groundtruth_list = [] + self._detection_masks_list = [] + self._category_id_set = set([cat['id'] for cat in self._categories]) + self._annotation_id = 1 + self._include_metrics_per_category = include_metrics_per_category + + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + self._image_id_to_mask_shape_map.clear() + self._image_ids_with_detections.clear() + self._groundtruth_list = [] + self._detection_masks_list = [] + + def add_single_ground_truth_image_info(self, + image_id, + groundtruth_dict): + """Adds groundtruth for a single image to be used for evaluation. + + If the image has already been added, a warning is logged, and groundtruth is + ignored. + + Args: + image_id: A unique string/integer identifier for the image. + groundtruth_dict: A dictionary containing - + InputDataFields.groundtruth_boxes: float32 numpy array of shape + [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format + [ymin, xmin, ymax, xmax] in absolute image coordinates. + InputDataFields.groundtruth_classes: integer numpy array of shape + [num_boxes] containing 1-indexed groundtruth classes for the boxes. + InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape + [num_boxes, image_height, image_width] containing groundtruth masks + corresponding to the boxes. The elements of the array must be in + {0, 1}. + """ + if image_id in self._image_id_to_mask_shape_map: + tf.logging.warning('Ignoring ground truth with image id %s since it was ' + 'previously added', image_id) + return + + groundtruth_instance_masks = groundtruth_dict[ + standard_fields.InputDataFields.groundtruth_instance_masks] + _check_mask_type_and_value(standard_fields.InputDataFields. + groundtruth_instance_masks, + groundtruth_instance_masks) + self._groundtruth_list.extend( + coco_tools. + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self._annotation_id, + category_id_set=self._category_id_set, + groundtruth_boxes=groundtruth_dict[standard_fields.InputDataFields. + groundtruth_boxes], + groundtruth_classes=groundtruth_dict[standard_fields. + InputDataFields. + groundtruth_classes], + groundtruth_masks=groundtruth_instance_masks)) + self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. + groundtruth_boxes].shape[0] + self._image_id_to_mask_shape_map[image_id] = groundtruth_dict[ + standard_fields.InputDataFields.groundtruth_instance_masks].shape + + def add_single_detected_image_info(self, + image_id, + detections_dict): + """Adds detections for a single image to be used for evaluation. + + If a detection has already been added for this image id, a warning is + logged, and the detection is skipped. + + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary containing - + DetectionResultFields.detection_scores: float32 numpy array of shape + [num_boxes] containing detection scores for the boxes. + DetectionResultFields.detection_classes: integer numpy array of shape + [num_boxes] containing 1-indexed detection classes for the boxes. + DetectionResultFields.detection_masks: optional uint8 numpy array of + shape [num_boxes, image_height, image_width] containing instance + masks corresponding to the boxes. The elements of the array must be + in {0, 1}. + + Raises: + ValueError: If groundtruth for the image_id is not available or if + spatial shapes of groundtruth_instance_masks and detection_masks are + incompatible. + """ + if image_id not in self._image_id_to_mask_shape_map: + raise ValueError('Missing groundtruth for image id: {}'.format(image_id)) + + if image_id in self._image_ids_with_detections: + tf.logging.warning('Ignoring detection with image id %s since it was ' + 'previously added', image_id) + return + + groundtruth_masks_shape = self._image_id_to_mask_shape_map[image_id] + detection_masks = detections_dict[standard_fields.DetectionResultFields. + detection_masks] + if groundtruth_masks_shape[1:] != detection_masks.shape[1:]: + raise ValueError('Spatial shape of groundtruth masks and detection masks ' + 'are incompatible: {} vs {}'.format( + groundtruth_masks_shape, + detection_masks.shape)) + _check_mask_type_and_value(standard_fields.DetectionResultFields. + detection_masks, + detection_masks) + self._detection_masks_list.extend( + coco_tools.ExportSingleImageDetectionMasksToCoco( + image_id=image_id, + category_id_set=self._category_id_set, + detection_masks=detection_masks, + detection_scores=detections_dict[standard_fields. + DetectionResultFields. + detection_scores], + detection_classes=detections_dict[standard_fields. + DetectionResultFields. + detection_classes])) + self._image_ids_with_detections.update([image_id]) + + def dump_detections_to_json_file(self, json_output_path): + """Saves the detections into json_output_path in the format used by MS COCO. + + Args: + json_output_path: String containing the output file's path. It can be also + None. In that case nothing will be written to the output file. + """ + if json_output_path and json_output_path is not None: + tf.logging.info('Dumping detections to output json file.') + with tf.gfile.GFile(json_output_path, 'w') as fid: + json_utils.Dump( + obj=self._detection_masks_list, fid=fid, float_digits=4, indent=2) + + def evaluate(self): + """Evaluates the detection masks and returns a dictionary of coco metrics. + + Returns: + A dictionary holding - + + 1. summary_metrics: + 'DetectionMasks_Precision/mAP': mean average precision over classes + averaged over IOU thresholds ranging from .5 to .95 with .05 increments. + 'DetectionMasks_Precision/mAP@.50IOU': mean average precision at 50% IOU. + 'DetectionMasks_Precision/mAP@.75IOU': mean average precision at 75% IOU. + 'DetectionMasks_Precision/mAP (small)': mean average precision for small + objects (area < 32^2 pixels). + 'DetectionMasks_Precision/mAP (medium)': mean average precision for medium + sized objects (32^2 pixels < area < 96^2 pixels). + 'DetectionMasks_Precision/mAP (large)': mean average precision for large + objects (96^2 pixels < area < 10000^2 pixels). + 'DetectionMasks_Recall/AR@1': average recall with 1 detection. + 'DetectionMasks_Recall/AR@10': average recall with 10 detections. + 'DetectionMasks_Recall/AR@100': average recall with 100 detections. + 'DetectionMasks_Recall/AR@100 (small)': average recall for small objects + with 100 detections. + 'DetectionMasks_Recall/AR@100 (medium)': average recall for medium objects + with 100 detections. + 'DetectionMasks_Recall/AR@100 (large)': average recall for large objects + with 100 detections. + + 2. per_category_ap: if include_metrics_per_category is True, category + specific results with keys of the form: + 'Precision mAP ByCategory/category' (without the supercategory part if + no supercategories exist). For backward compatibility + 'PerformanceByCategory' is included in the output regardless of + all_metrics_per_category. + """ + groundtruth_dict = { + 'annotations': self._groundtruth_list, + 'images': [{'id': image_id, 'height': shape[1], 'width': shape[2]} + for image_id, shape in self._image_id_to_mask_shape_map. + items()], + 'categories': self._categories + } + coco_wrapped_groundtruth = coco_tools.COCOWrapper( + groundtruth_dict, detection_type='segmentation') + coco_wrapped_detection_masks = coco_wrapped_groundtruth.LoadAnnotations( + self._detection_masks_list) + mask_evaluator = coco_tools.COCOEvalWrapper( + coco_wrapped_groundtruth, coco_wrapped_detection_masks, + agnostic_mode=False, iou_type='segm') + mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics( + include_metrics_per_category=self._include_metrics_per_category) + mask_metrics.update(mask_per_category_ap) + mask_metrics = {'DetectionMasks_'+ key: value + for key, value in mask_metrics.items()} + return mask_metrics + + def get_estimator_eval_metric_ops(self, eval_dict): + """Returns a dictionary of eval metric ops. + + Note that once value_op is called, the detections and groundtruth added via + update_op are cleared. + + Args: + eval_dict: A dictionary that holds tensors for evaluating object detection + performance. For single-image evaluation, this dictionary may be + produced from eval_util.result_dict_for_single_example(). If multi-image + evaluation, `eval_dict` should contain the fields + 'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to + properly unpad the tensors from the batch. + + Returns: + a dictionary of metric names to tuple of value_op and update_op that can + be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all + update ops must be run together and similarly all value ops must be run + together to guarantee correct behaviour. + """ + + def update_op(image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, + groundtruth_instance_masks_batched, + groundtruth_is_crowd_batched, num_gt_boxes_per_image, + detection_scores_batched, detection_classes_batched, + detection_masks_batched, num_det_boxes_per_image): + """Update op for metrics.""" + + for (image_id, groundtruth_boxes, groundtruth_classes, + groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box, + detection_scores, detection_classes, + detection_masks, num_det_box) in zip( + image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, groundtruth_instance_masks_batched, + groundtruth_is_crowd_batched, num_gt_boxes_per_image, + detection_scores_batched, detection_classes_batched, + detection_masks_batched, num_det_boxes_per_image): + self.add_single_ground_truth_image_info( + image_id, { + 'groundtruth_boxes': + groundtruth_boxes[:num_gt_box], + 'groundtruth_classes': + groundtruth_classes[:num_gt_box], + 'groundtruth_instance_masks': + groundtruth_instance_masks[:num_gt_box], + 'groundtruth_is_crowd': + groundtruth_is_crowd[:num_gt_box] + }) + self.add_single_detected_image_info( + image_id, { + 'detection_scores': detection_scores[:num_det_box], + 'detection_classes': detection_classes[:num_det_box], + 'detection_masks': detection_masks[:num_det_box] + }) + + # Unpack items from the evaluation dictionary. + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + image_id = eval_dict[input_data_fields.key] + groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] + groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] + groundtruth_instance_masks = eval_dict[ + input_data_fields.groundtruth_instance_masks] + groundtruth_is_crowd = eval_dict.get( + input_data_fields.groundtruth_is_crowd, None) + num_gt_boxes_per_image = eval_dict.get( + input_data_fields.num_groundtruth_boxes, None) + detection_scores = eval_dict[detection_fields.detection_scores] + detection_classes = eval_dict[detection_fields.detection_classes] + detection_masks = eval_dict[detection_fields.detection_masks] + num_det_boxes_per_image = eval_dict.get(detection_fields.num_detections, + None) + + if groundtruth_is_crowd is None: + groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) + + if not image_id.shape.as_list(): + # Apply a batch dimension to all tensors. + image_id = tf.expand_dims(image_id, 0) + groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) + groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) + groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0) + groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0) + detection_scores = tf.expand_dims(detection_scores, 0) + detection_classes = tf.expand_dims(detection_classes, 0) + detection_masks = tf.expand_dims(detection_masks, 0) + + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] + else: + num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0) + + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.shape(detection_scores)[1:2] + else: + num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0) + else: + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.tile( + tf.shape(groundtruth_boxes)[1:2], + multiples=tf.shape(groundtruth_boxes)[0:1]) + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.tile( + tf.shape(detection_scores)[1:2], + multiples=tf.shape(detection_scores)[0:1]) + + update_op = tf.py_func(update_op, [ + image_id, groundtruth_boxes, groundtruth_classes, + groundtruth_instance_masks, groundtruth_is_crowd, + num_gt_boxes_per_image, detection_scores, detection_classes, + detection_masks, num_det_boxes_per_image + ], []) + + metric_names = ['DetectionMasks_Precision/mAP', + 'DetectionMasks_Precision/mAP@.50IOU', + 'DetectionMasks_Precision/mAP@.75IOU', + 'DetectionMasks_Precision/mAP (large)', + 'DetectionMasks_Precision/mAP (medium)', + 'DetectionMasks_Precision/mAP (small)', + 'DetectionMasks_Recall/AR@1', + 'DetectionMasks_Recall/AR@10', + 'DetectionMasks_Recall/AR@100', + 'DetectionMasks_Recall/AR@100 (large)', + 'DetectionMasks_Recall/AR@100 (medium)', + 'DetectionMasks_Recall/AR@100 (small)'] + if self._include_metrics_per_category: + for category_dict in self._categories: + metric_names.append('DetectionMasks_PerformanceByCategory/mAP/' + + category_dict['name']) + + def first_value_func(): + self._metrics = self.evaluate() + self.clear() + return np.float32(self._metrics[metric_names[0]]) + + def value_func_factory(metric_name): + def value_func(): + return np.float32(self._metrics[metric_name]) + return value_func + + # Ensure that the metrics are only evaluated once. + first_value_op = tf.py_func(first_value_func, [], tf.float32) + eval_metric_ops = {metric_names[0]: (first_value_op, update_op)} + with tf.control_dependencies([first_value_op]): + for metric_name in metric_names[1:]: + eval_metric_ops[metric_name] = (tf.py_func( + value_func_factory(metric_name), [], np.float32), update_op) + return eval_metric_ops diff --git a/metrics/coco_evaluation_test.py b/metrics/coco_evaluation_test.py new file mode 100644 index 0000000..0a567c5 --- /dev/null +++ b/metrics/coco_evaluation_test.py @@ -0,0 +1,951 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow_models.object_detection.metrics.coco_evaluation.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf +from object_detection.core import standard_fields +from object_detection.metrics import coco_evaluation + + +def _get_categories_list(): + return [{ + 'id': 1, + 'name': 'person' + }, { + 'id': 2, + 'name': 'dog' + }, { + 'id': 3, + 'name': 'cat' + }] + + +class CocoDetectionEvaluationTest(tf.test.TestCase): + + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + """Tests that mAP is calculated correctly on GT and Detections.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image2', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[50., 50., 100., 100.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image2', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[50., 50., 100., 100.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image3', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[25., 25., 50., 50.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image3', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[25., 25., 50., 50.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + metrics = coco_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self): + """Tests computing mAP with is_crowd GT boxes skipped.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.], [99., 99., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: + np.array([1, 2]), + standard_fields.InputDataFields.groundtruth_is_crowd: + np.array([0, 1]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + metrics = coco_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self): + """Tests computing mAP with empty is_crowd array passed in.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: + np.array([1]), + standard_fields.InputDataFields.groundtruth_is_crowd: + np.array([]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + metrics = coco_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + + def testRejectionOnDuplicateGroundtruth(self): + """Tests that groundtruth cannot be added more than once for an image.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + # Add groundtruth + image_key1 = 'img1' + groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], + dtype=float) + groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) + coco_evaluator.add_single_ground_truth_image_info(image_key1, { + standard_fields.InputDataFields.groundtruth_boxes: + groundtruth_boxes1, + standard_fields.InputDataFields.groundtruth_classes: + groundtruth_class_labels1 + }) + groundtruth_lists_len = len(coco_evaluator._groundtruth_list) + + # Add groundtruth with the same image id. + coco_evaluator.add_single_ground_truth_image_info(image_key1, { + standard_fields.InputDataFields.groundtruth_boxes: + groundtruth_boxes1, + standard_fields.InputDataFields.groundtruth_classes: + groundtruth_class_labels1 + }) + self.assertEqual(groundtruth_lists_len, + len(coco_evaluator._groundtruth_list)) + + def testRejectionOnDuplicateDetections(self): + """Tests that detections cannot be added more than once for an image.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + # Add groundtruth + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[99., 100., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + detections_lists_len = len(coco_evaluator._detection_boxes_list) + coco_evaluator.add_single_detected_image_info( + image_id='image1', # Note that this image id was previously added. + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + self.assertEqual(detections_lists_len, + len(coco_evaluator._detection_boxes_list)) + + def testExceptionRaisedWithMissingGroundtruth(self): + """Tests that exception is raised for detection with missing groundtruth.""" + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + with self.assertRaises(ValueError): + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + + +class CocoEvaluationPyFuncTest(tf.test.TestCase): + + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + image_id = tf.placeholder(tf.string, shape=()) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) + detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(None)) + detection_classes = tf.placeholder(tf.float32, shape=(None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run(update_op, + feed_dict={ + image_id: 'image1', + groundtruth_boxes: np.array([[100., 100., 200., 200.]]), + groundtruth_classes: np.array([1]), + detection_boxes: np.array([[100., 100., 200., 200.]]), + detection_scores: np.array([.8]), + detection_classes: np.array([1]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image2', + groundtruth_boxes: np.array([[50., 50., 100., 100.]]), + groundtruth_classes: np.array([3]), + detection_boxes: np.array([[50., 50., 100., 100.]]), + detection_scores: np.array([.7]), + detection_classes: np.array([3]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image3', + groundtruth_boxes: np.array([[25., 25., 50., 50.]]), + groundtruth_classes: np.array([2]), + detection_boxes: np.array([[25., 25., 50., 50.]]), + detection_scores: np.array([.9]), + detection_classes: np.array([2]) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + image_id = tf.placeholder(tf.string, shape=()) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) + is_annotated = tf.placeholder(tf.bool, shape=()) + detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(None)) + detection_classes = tf.placeholder(tf.float32, shape=(None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + 'is_annotated': is_annotated, + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run(update_op, + feed_dict={ + image_id: 'image1', + groundtruth_boxes: np.array([[100., 100., 200., 200.]]), + groundtruth_classes: np.array([1]), + is_annotated: True, + detection_boxes: np.array([[100., 100., 200., 200.]]), + detection_scores: np.array([.8]), + detection_classes: np.array([1]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image2', + groundtruth_boxes: np.array([[50., 50., 100., 100.]]), + groundtruth_classes: np.array([3]), + is_annotated: True, + detection_boxes: np.array([[50., 50., 100., 100.]]), + detection_scores: np.array([.7]), + detection_classes: np.array([3]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image3', + groundtruth_boxes: np.array([[25., 25., 50., 50.]]), + groundtruth_classes: np.array([2]), + is_annotated: True, + detection_boxes: np.array([[25., 25., 50., 50.]]), + detection_scores: np.array([.9]), + detection_classes: np.array([2]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image4', + groundtruth_boxes: np.zeros((0, 4)), + groundtruth_classes: np.zeros((0)), + is_annotated: False, # Note that this image isn't annotated. + detection_boxes: np.array([[25., 25., 50., 50.], + [25., 25., 70., 50.], + [25., 25., 80., 50.], + [25., 25., 90., 50.]]), + detection_scores: np.array([0.6, 0.7, 0.8, 0.9]), + detection_classes: np.array([1, 2, 2, 3]) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + image_id = tf.placeholder(tf.string, shape=()) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) + detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(None)) + detection_classes = tf.placeholder(tf.float32, shape=(None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run( + update_op, + feed_dict={ + image_id: + 'image1', + groundtruth_boxes: + np.array([[100., 100., 200., 200.], [-1, -1, -1, -1]]), + groundtruth_classes: + np.array([1, -1]), + detection_boxes: + np.array([[100., 100., 200., 200.], [0., 0., 0., 0.]]), + detection_scores: + np.array([.8, 0.]), + detection_classes: + np.array([1, -1]) + }) + sess.run( + update_op, + feed_dict={ + image_id: + 'image2', + groundtruth_boxes: + np.array([[50., 50., 100., 100.], [-1, -1, -1, -1]]), + groundtruth_classes: + np.array([3, -1]), + detection_boxes: + np.array([[50., 50., 100., 100.], [0., 0., 0., 0.]]), + detection_scores: + np.array([.7, 0.]), + detection_classes: + np.array([3, -1]) + }) + sess.run( + update_op, + feed_dict={ + image_id: + 'image3', + groundtruth_boxes: + np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]), + groundtruth_classes: + np.array([2, 2]), + detection_boxes: + np.array([[25., 25., 50., 50.], [10., 10., 15., 15.]]), + detection_scores: + np.array([.95, .9]), + detection_classes: + np.array([2, 2]) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + batch_size = 3 + image_id = tf.placeholder(tf.string, shape=(batch_size)) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) + detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run(update_op, + feed_dict={ + image_id: ['image1', 'image2', 'image3'], + groundtruth_boxes: np.array([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]]), + groundtruth_classes: np.array([[1], [3], [2]]), + detection_boxes: np.array([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]]), + detection_scores: np.array([[.8], [.7], [.9]]), + detection_classes: np.array([[1], [3], [2]]) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + batch_size = 3 + image_id = tf.placeholder(tf.string, shape=(batch_size)) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + num_gt_boxes_per_image = tf.placeholder(tf.int32, shape=(None)) + detection_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) + detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + num_det_boxes_per_image = tf.placeholder(tf.int32, shape=(None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + detection_fields.detection_boxes: detection_boxes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes, + 'num_groundtruth_boxes_per_image': num_gt_boxes_per_image, + 'num_det_boxes_per_image': num_det_boxes_per_image + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run( + update_op, + feed_dict={ + image_id: ['image1', 'image2', 'image3'], + groundtruth_boxes: + np.array([[[100., 100., 200., 200.], [-1, -1, -1, -1]], + [[50., 50., 100., 100.], [-1, -1, -1, -1]], + [[25., 25., 50., 50.], [10., 10., 15., 15.]]]), + groundtruth_classes: + np.array([[1, -1], [3, -1], [2, 2]]), + num_gt_boxes_per_image: + np.array([1, 1, 2]), + detection_boxes: + np.array([[[100., 100., 200., 200.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]], + [[50., 50., 100., 100.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]], + [[25., 25., 50., 50.], + [10., 10., 15., 15.], + [10., 10., 15., 15.]]]), + detection_scores: + np.array([[.8, 0., 0.], [.7, 0., 0.], [.95, .9, 0.9]]), + detection_classes: + np.array([[1, -1, -1], [3, -1, -1], [2, 2, 2]]), + num_det_boxes_per_image: + np.array([1, 1, 3]), + }) + + # Check the number of bounding boxes added. + self.assertEqual(len(coco_evaluator._groundtruth_list), 4) + self.assertEqual(len(coco_evaluator._detection_boxes_list), 5) + + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 0.83333331) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + + +class CocoMaskEvaluationTest(tf.test.TestCase): + + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list()) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]), + standard_fields.InputDataFields.groundtruth_instance_masks: + np.pad(np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[100., 100., 200., 200.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]), + standard_fields.DetectionResultFields.detection_masks: + np.pad(np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image2', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[50., 50., 100., 100.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]), + standard_fields.InputDataFields.groundtruth_instance_masks: + np.pad(np.ones([1, 50, 50], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + coco_evaluator.add_single_detected_image_info( + image_id='image2', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[50., 50., 100., 100.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]), + standard_fields.DetectionResultFields.detection_masks: + np.pad(np.ones([1, 50, 50], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image3', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[25., 25., 50., 50.]]), + standard_fields.InputDataFields.groundtruth_classes: np.array([1]), + standard_fields.InputDataFields.groundtruth_instance_masks: + np.pad(np.ones([1, 25, 25], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + coco_evaluator.add_single_detected_image_info( + image_id='image3', + detections_dict={ + standard_fields.DetectionResultFields.detection_boxes: + np.array([[25., 25., 50., 50.]]), + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]), + standard_fields.DetectionResultFields.detection_masks: + np.pad(np.ones([1, 25, 25], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), mode='constant') + }) + metrics = coco_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) + coco_evaluator.clear() + self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) + self.assertFalse(coco_evaluator._image_ids_with_detections) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_masks_list) + + +class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): + + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list()) + image_id = tf.placeholder(tf.string, shape=()) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) + groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) + detection_scores = tf.placeholder(tf.float32, shape=(None)) + detection_classes = tf.placeholder(tf.float32, shape=(None)) + detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + input_data_fields.groundtruth_instance_masks: groundtruth_masks, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes, + detection_fields.detection_masks: detection_masks, + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionMasks_Precision/mAP'] + + with self.test_session() as sess: + sess.run( + update_op, + feed_dict={ + image_id: + 'image1', + groundtruth_boxes: + np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]), + groundtruth_classes: + np.array([1, 2]), + groundtruth_masks: + np.stack([ + np.pad( + np.ones([100, 100], dtype=np.uint8), ((10, 10), + (10, 10)), + mode='constant'), + np.pad( + np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), + mode='constant') + ]), + detection_scores: + np.array([.9, .8]), + detection_classes: + np.array([2, 1]), + detection_masks: + np.stack([ + np.pad( + np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), + mode='constant'), + np.pad( + np.ones([100, 100], dtype=np.uint8), ((10, 10), + (10, 10)), + mode='constant'), + ]) + }) + sess.run(update_op, + feed_dict={ + image_id: 'image2', + groundtruth_boxes: np.array([[50., 50., 100., 100.]]), + groundtruth_classes: np.array([1]), + groundtruth_masks: np.pad(np.ones([1, 50, 50], + dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), + mode='constant'), + detection_scores: np.array([.8]), + detection_classes: np.array([1]), + detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), + mode='constant') + }) + sess.run(update_op, + feed_dict={ + image_id: 'image3', + groundtruth_boxes: np.array([[25., 25., 50., 50.]]), + groundtruth_classes: np.array([1]), + groundtruth_masks: np.pad(np.ones([1, 25, 25], + dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), + mode='constant'), + detection_scores: np.array([.8]), + detection_classes: np.array([1]), + detection_masks: np.pad(np.ones([1, 25, 25], + dtype=np.uint8), + ((0, 0), (10, 10), (10, 10)), + mode='constant') + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._image_ids_with_detections) + self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) + self.assertFalse(coco_evaluator._detection_masks_list) + + def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self): + coco_evaluator = coco_evaluation.CocoMaskEvaluator(_get_categories_list()) + batch_size = 3 + image_id = tf.placeholder(tf.string, shape=(batch_size)) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(batch_size, None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + groundtruth_masks = tf.placeholder( + tf.uint8, shape=(batch_size, None, None, None)) + detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) + detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) + detection_masks = tf.placeholder( + tf.uint8, shape=(batch_size, None, None, None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + input_data_fields.groundtruth_instance_masks: groundtruth_masks, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes, + detection_fields.detection_masks: detection_masks, + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionMasks_Precision/mAP'] + + with self.test_session() as sess: + sess.run( + update_op, + feed_dict={ + image_id: ['image1', 'image2', 'image3'], + groundtruth_boxes: + np.array([[[100., 100., 200., 200.]], + [[50., 50., 100., 100.]], + [[25., 25., 50., 50.]]]), + groundtruth_classes: + np.array([[1], [1], [1]]), + groundtruth_masks: + np.stack([ + np.pad( + np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (0, 0), (0, 0)), + mode='constant'), + np.pad( + np.ones([1, 50, 50], dtype=np.uint8), + ((0, 0), (25, 25), (25, 25)), + mode='constant'), + np.pad( + np.ones([1, 25, 25], dtype=np.uint8), + ((0, 0), (37, 38), (37, 38)), + mode='constant') + ], + axis=0), + detection_scores: + np.array([[.8], [.8], [.8]]), + detection_classes: + np.array([[1], [1], [1]]), + detection_masks: + np.stack([ + np.pad( + np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (0, 0), (0, 0)), + mode='constant'), + np.pad( + np.ones([1, 50, 50], dtype=np.uint8), + ((0, 0), (25, 25), (25, 25)), + mode='constant'), + np.pad( + np.ones([1, 25, 25], dtype=np.uint8), + ((0, 0), (37, 38), (37, 38)), + mode='constant') + ], + axis=0) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.iteritems(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionMasks_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._image_ids_with_detections) + self.assertFalse(coco_evaluator._image_id_to_mask_shape_map) + self.assertFalse(coco_evaluator._detection_masks_list) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/coco_tools.py b/metrics/coco_tools.py new file mode 100644 index 0000000..7e7bb16 --- /dev/null +++ b/metrics/coco_tools.py @@ -0,0 +1,856 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict +import copy +import time +import numpy as np + +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask + +from six.moves import range +from six.moves import zip +import tensorflow as tf + +from object_detection.utils import json_utils + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class.""" + + def __init__(self, dataset, detection_type='bbox'): + """COCOWrapper constructor. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations): + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if annotations is not a list + ValueError: if annotations do not correspond to the images contained + in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + tf.logging.info('Loading and preparing annotation results...') + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, groundtruth=None, detections=None, agnostic_mode=False, + iou_type='bbox'): + """COCOEvalWrapper constructor. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + + def GetCategory(self, category_id): + """Fetches dictionary holding category information given category id. + + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self): + """Returns true if COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self): + """Returns list of valid category ids.""" + return self.params.catIds + + def ComputeMetrics(self, + include_metrics_per_category=False, + all_metrics_per_category=False): + """Computes detection metrics. + + Args: + include_metrics_per_category: If True, will include metrics per category. + all_metrics_per_category: If true, include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + 1. summary_metrics: a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments + 'Precision/mAP@.50IOU': mean average precision at 50% IOU + 'Precision/mAP@.75IOU': mean average precision at 75% IOU + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels) + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels) + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels) + 'Recall/AR@1': average recall with 1 detection + 'Recall/AR@10': average recall with 10 detections + 'Recall/AR@100': average recall with 100 detections + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections + 2. per_category_ap: a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. + If evaluating class-agnostic mode, per_category_ap is an empty + dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), + ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a [ymin, xmin, ymax, xmax] numpy array + + Returns: + a list of floats representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), + float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id, + next_annotation_id, + category_id_set, + groundtruth_boxes, + groundtruth_classes, + groundtruth_masks=None, + groundtruth_is_crowd=None): + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + a list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' + 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' + 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' + 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError('Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % ( + groundtruth_classes.shape[0], + groundtruth_boxes.shape[0], image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportGroundtruthToCOCO(image_ids, + groundtruth_boxes, + groundtruth_classes, + categories, + output_path=None): + """Export groundtruth detection annotations in numpy arrays to COCO API. + + This function converts a set of groundtruth detection annotations represented + as numpy arrays to dictionaries that can be ingested by the COCO API. + Inputs to this function are three lists: image ids for each groundtruth image, + groundtruth boxes for each image and groundtruth classes respectively. + Note that the image_ids provided here must match the ones given to the + ExportDetectionsToCOCO function in order for evaluation to work properly. + We assume that for each image, boxes, scores and classes are in + correspondence --- that is: image_id[i], groundtruth_boxes[i, :] and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box and "iscrowd" fields are always set to 0. + TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset. + + Args: + image_ids: a list of unique image identifier either of type integer or + string. + groundtruth_boxes: list of numpy arrays with shape [num_gt_boxes, 4] + (note that num_gt_boxes can be different for each entry in the list) + groundtruth_classes: list of numpy arrays (int) with shape [num_gt_boxes] + (note that num_gt_boxes can be different for each entry in the list) + categories: a list of dictionaries representing all possible categories. + Each dict in this list has the following keys: + 'id': (required) an integer id uniquely identifying this category + 'name': (required) string representing category name + e.g., 'cat', 'dog', 'pizza' + 'supercategory': (optional) string representing the supercategory + e.g., 'animal', 'vehicle', 'food', etc + output_path: (optional) path for exporting result to JSON + Returns: + dictionary that can be read by COCO API + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + category_id_set = set([cat['id'] for cat in categories]) + groundtruth_export_list = [] + image_export_list = [] + if not len(image_ids) == len(groundtruth_boxes) == len(groundtruth_classes): + raise ValueError('Input lists must have the same length') + + # For reasons internal to the COCO API, it is important that annotation ids + # are not equal to zero; we thus start counting from 1. + annotation_id = 1 + for image_id, boxes, classes in zip(image_ids, groundtruth_boxes, + groundtruth_classes): + image_export_list.append({'id': image_id}) + groundtruth_export_list.extend(ExportSingleImageGroundtruthToCoco( + image_id, + annotation_id, + category_id_set, + boxes, + classes)) + num_boxes = classes.shape[0] + annotation_id += num_boxes + + groundtruth_dict = { + 'annotations': groundtruth_export_list, + 'images': image_export_list, + 'categories': categories + } + if output_path: + with tf.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2) + return groundtruth_dict + + +def ExportSingleImageDetectionBoxesToCoco(image_id, + category_id_set, + detection_boxes, + detection_scores, + detection_classes): + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + a list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % ( + detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0] + )) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': image_id, + 'category_id': int(detection_classes[i]), + 'bbox': list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id, + category_id_set, + detection_masks, + detection_scores, + detection_classes): + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + a list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % ( + detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0] + )) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': image_id, + 'category_id': int(detection_classes[i]), + 'segmentation': _RleCompress(detection_masks[i]), + 'score': float(detection_scores[i]) + }) + return detections_list + + +def ExportDetectionsToCOCO(image_ids, + detection_boxes, + detection_scores, + detection_classes, + categories, + output_path=None): + """Export detection annotations in numpy arrays to COCO API. + + This function converts a set of predicted detections represented + as numpy arrays to dictionaries that can be ingested by the COCO API. + Inputs to this function are lists, consisting of boxes, scores and + classes, respectively, corresponding to each image for which detections + have been produced. Note that the image_ids provided here must + match the ones given to the ExportGroundtruthToCOCO function in order + for evaluation to work properly. + + We assume that for each image, boxes, scores and classes are in + correspondence --- that is: detection_boxes[i, :], detection_scores[i] and + detection_classes[i] are associated with the same detection. + + Args: + image_ids: a list of unique image identifier either of type integer or + string. + detection_boxes: list of numpy arrays with shape [num_detection_boxes, 4] + detection_scores: list of numpy arrays (float) with shape + [num_detection_boxes]. Note that num_detection_boxes can be different + for each entry in the list. + detection_classes: list of numpy arrays (int) with shape + [num_detection_boxes]. Note that num_detection_boxes can be different + for each entry in the list. + categories: a list of dictionaries representing all possible categories. + Each dict in this list must have an integer 'id' key uniquely identifying + this category. + output_path: (optional) path for exporting result to JSON + + Returns: + list of dictionaries that can be read by COCO API, where each entry + corresponds to a single detection and has keys from: + ['image_id', 'category_id', 'bbox', 'score']. + Raises: + ValueError: if (1) detection_boxes and detection_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers. + """ + category_id_set = set([cat['id'] for cat in categories]) + detections_export_list = [] + if not (len(image_ids) == len(detection_boxes) == len(detection_scores) == + len(detection_classes)): + raise ValueError('Input lists must have the same length') + for image_id, boxes, scores, classes in zip(image_ids, detection_boxes, + detection_scores, + detection_classes): + detections_export_list.extend(ExportSingleImageDetectionBoxesToCoco( + image_id, + category_id_set, + boxes, + scores, + classes)) + if output_path: + with tf.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2) + return detections_export_list + + +def ExportSegmentsToCOCO(image_ids, + detection_masks, + detection_scores, + detection_classes, + categories, + output_path=None): + """Export segmentation masks in numpy arrays to COCO API. + + This function converts a set of predicted instance masks represented + as numpy arrays to dictionaries that can be ingested by the COCO API. + Inputs to this function are lists, consisting of segments, scores and + classes, respectively, corresponding to each image for which detections + have been produced. + + Note this function is recommended to use for small dataset. + For large dataset, it should be used with a merge function + (e.g. in map reduce), otherwise the memory consumption is large. + + We assume that for each image, masks, scores and classes are in + correspondence --- that is: detection_masks[i, :, :, :], detection_scores[i] + and detection_classes[i] are associated with the same detection. + + Args: + image_ids: list of image ids (typically ints or strings) + detection_masks: list of numpy arrays with shape [num_detection, h, w, 1] + and type uint8. The height and width should match the shape of + corresponding image. + detection_scores: list of numpy arrays (float) with shape + [num_detection]. Note that num_detection can be different + for each entry in the list. + detection_classes: list of numpy arrays (int) with shape + [num_detection]. Note that num_detection can be different + for each entry in the list. + categories: a list of dictionaries representing all possible categories. + Each dict in this list must have an integer 'id' key uniquely identifying + this category. + output_path: (optional) path for exporting result to JSON + + Returns: + list of dictionaries that can be read by COCO API, where each entry + corresponds to a single detection and has keys from: + ['image_id', 'category_id', 'segmentation', 'score']. + + Raises: + ValueError: if detection_masks and detection_classes do not have the + right lengths or if each of the elements inside these lists do not + have the correct shapes. + """ + if not (len(image_ids) == len(detection_masks) == len(detection_scores) == + len(detection_classes)): + raise ValueError('Input lists must have the same length') + + segment_export_list = [] + for image_id, masks, scores, classes in zip(image_ids, detection_masks, + detection_scores, + detection_classes): + + if len(classes.shape) != 1 or len(scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(masks.shape) != 4: + raise ValueError('All entries in masks expected to be of ' + 'rank 4. Given {}'.format(masks.shape)) + + num_boxes = classes.shape[0] + if not num_boxes == masks.shape[0] == scores.shape[0]: + raise ValueError('Corresponding entries in segment_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).') + + category_id_set = set([cat['id'] for cat in categories]) + segment_export_list.extend(ExportSingleImageDetectionMasksToCoco( + image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes)) + + if output_path: + with tf.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2) + return segment_export_list + + +def ExportKeypointsToCOCO(image_ids, + detection_keypoints, + detection_scores, + detection_classes, + categories, + output_path=None): + """Exports keypoints in numpy arrays to COCO API. + + This function converts a set of predicted keypoints represented + as numpy arrays to dictionaries that can be ingested by the COCO API. + Inputs to this function are lists, consisting of keypoints, scores and + classes, respectively, corresponding to each image for which detections + have been produced. + + We assume that for each image, keypoints, scores and classes are in + correspondence --- that is: detection_keypoints[i, :, :, :], + detection_scores[i] and detection_classes[i] are associated with the same + detection. + + Args: + image_ids: list of image ids (typically ints or strings) + detection_keypoints: list of numpy arrays with shape + [num_detection, num_keypoints, 2] and type float32 in absolute + x-y coordinates. + detection_scores: list of numpy arrays (float) with shape + [num_detection]. Note that num_detection can be different + for each entry in the list. + detection_classes: list of numpy arrays (int) with shape + [num_detection]. Note that num_detection can be different + for each entry in the list. + categories: a list of dictionaries representing all possible categories. + Each dict in this list must have an integer 'id' key uniquely identifying + this category and an integer 'num_keypoints' key specifying the number of + keypoints the category has. + output_path: (optional) path for exporting result to JSON + + Returns: + list of dictionaries that can be read by COCO API, where each entry + corresponds to a single detection and has keys from: + ['image_id', 'category_id', 'keypoints', 'score']. + + Raises: + ValueError: if detection_keypoints and detection_classes do not have the + right lengths or if each of the elements inside these lists do not + have the correct shapes. + """ + if not (len(image_ids) == len(detection_keypoints) == + len(detection_scores) == len(detection_classes)): + raise ValueError('Input lists must have the same length') + + keypoints_export_list = [] + for image_id, keypoints, scores, classes in zip( + image_ids, detection_keypoints, detection_scores, detection_classes): + + if len(classes.shape) != 1 or len(scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(keypoints.shape) != 3: + raise ValueError('All entries in keypoints expected to be of ' + 'rank 3. Given {}'.format(keypoints.shape)) + + num_boxes = classes.shape[0] + if not num_boxes == keypoints.shape[0] == scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_keypoints, and detection_scores should have ' + 'compatible shapes (i.e., agree on the 0th dimension).') + + category_id_set = set([cat['id'] for cat in categories]) + category_id_to_num_keypoints_map = { + cat['id']: cat['num_keypoints'] for cat in categories + if 'num_keypoints' in cat} + + for i in range(num_boxes): + if classes[i] not in category_id_set: + raise ValueError('class id should be in category_id_set\n') + + if classes[i] in category_id_to_num_keypoints_map: + num_keypoints = category_id_to_num_keypoints_map[classes[i]] + # Adds extra ones to indicate the visibility for each keypoint as is + # recommended by MSCOCO. + instance_keypoints = np.concatenate( + [keypoints[i, 0:num_keypoints, :], + np.expand_dims(np.ones(num_keypoints), axis=1)], + axis=1).astype(int) + + instance_keypoints = instance_keypoints.flatten().tolist() + keypoints_export_list.append({ + 'image_id': image_id, + 'category_id': int(classes[i]), + 'keypoints': instance_keypoints, + 'score': float(scores[i]) + }) + + if output_path: + with tf.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2) + return keypoints_export_list diff --git a/metrics/coco_tools_test.py b/metrics/coco_tools_test.py new file mode 100644 index 0000000..cfb73d8 --- /dev/null +++ b/metrics/coco_tools_test.py @@ -0,0 +1,295 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow_model.object_detection.metrics.coco_tools.""" +import json +import os +import re +import numpy as np + +from pycocotools import mask + +import tensorflow as tf + +from object_detection.metrics import coco_tools + + +class CocoToolsTest(tf.test.TestCase): + + def setUp(self): + groundtruth_annotations_list = [ + { + 'id': 1, + 'image_id': 'first', + 'category_id': 1, + 'bbox': [100., 100., 100., 100.], + 'area': 100.**2, + 'iscrowd': 0 + }, + { + 'id': 2, + 'image_id': 'second', + 'category_id': 1, + 'bbox': [50., 50., 50., 50.], + 'area': 50.**2, + 'iscrowd': 0 + }, + ] + image_list = [{'id': 'first'}, {'id': 'second'}] + category_list = [{'id': 0, 'name': 'person'}, + {'id': 1, 'name': 'cat'}, + {'id': 2, 'name': 'dog'}] + self._groundtruth_dict = { + 'annotations': groundtruth_annotations_list, + 'images': image_list, + 'categories': category_list + } + + self._detections_list = [ + { + 'image_id': 'first', + 'category_id': 1, + 'bbox': [100., 100., 100., 100.], + 'score': .8 + }, + { + 'image_id': 'second', + 'category_id': 1, + 'bbox': [50., 50., 50., 50.], + 'score': .7 + }, + ] + + def testCocoWrappers(self): + groundtruth = coco_tools.COCOWrapper(self._groundtruth_dict) + detections = groundtruth.LoadAnnotations(self._detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections) + summary_metrics, _ = evaluator.ComputeMetrics() + self.assertAlmostEqual(1.0, summary_metrics['Precision/mAP']) + + def testExportGroundtruthToCOCO(self): + image_ids = ['first', 'second'] + groundtruth_boxes = [np.array([[100, 100, 200, 200]], np.float), + np.array([[50, 50, 100, 100]], np.float)] + groundtruth_classes = [np.array([1], np.int32), np.array([1], np.int32)] + categories = [{'id': 0, 'name': 'person'}, + {'id': 1, 'name': 'cat'}, + {'id': 2, 'name': 'dog'}] + output_path = os.path.join(tf.test.get_temp_dir(), 'groundtruth.json') + result = coco_tools.ExportGroundtruthToCOCO( + image_ids, + groundtruth_boxes, + groundtruth_classes, + categories, + output_path=output_path) + self.assertDictEqual(result, self._groundtruth_dict) + with tf.gfile.GFile(output_path, 'r') as f: + written_result = f.read() + # The json output should have floats written to 4 digits of precision. + matcher = re.compile(r'"bbox":\s+\[\n\s+\d+.\d\d\d\d,', re.MULTILINE) + self.assertTrue(matcher.findall(written_result)) + written_result = json.loads(written_result) + self.assertAlmostEqual(result, written_result) + + def testExportDetectionsToCOCO(self): + image_ids = ['first', 'second'] + detections_boxes = [np.array([[100, 100, 200, 200]], np.float), + np.array([[50, 50, 100, 100]], np.float)] + detections_scores = [np.array([.8], np.float), np.array([.7], np.float)] + detections_classes = [np.array([1], np.int32), np.array([1], np.int32)] + categories = [{'id': 0, 'name': 'person'}, + {'id': 1, 'name': 'cat'}, + {'id': 2, 'name': 'dog'}] + output_path = os.path.join(tf.test.get_temp_dir(), 'detections.json') + result = coco_tools.ExportDetectionsToCOCO( + image_ids, + detections_boxes, + detections_scores, + detections_classes, + categories, + output_path=output_path) + self.assertListEqual(result, self._detections_list) + with tf.gfile.GFile(output_path, 'r') as f: + written_result = f.read() + # The json output should have floats written to 4 digits of precision. + matcher = re.compile(r'"bbox":\s+\[\n\s+\d+.\d\d\d\d,', re.MULTILINE) + self.assertTrue(matcher.findall(written_result)) + written_result = json.loads(written_result) + self.assertAlmostEqual(result, written_result) + + def testExportSegmentsToCOCO(self): + image_ids = ['first', 'second'] + detection_masks = [np.array( + [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], + dtype=np.uint8), np.array( + [[[0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 1]]], + dtype=np.uint8)] + + for i, detection_mask in enumerate(detection_masks): + detection_masks[i] = detection_mask[:, :, :, None] + + detection_scores = [np.array([.8], np.float), np.array([.7], np.float)] + detection_classes = [np.array([1], np.int32), np.array([1], np.int32)] + + categories = [{'id': 0, 'name': 'person'}, + {'id': 1, 'name': 'cat'}, + {'id': 2, 'name': 'dog'}] + output_path = os.path.join(tf.test.get_temp_dir(), 'segments.json') + result = coco_tools.ExportSegmentsToCOCO( + image_ids, + detection_masks, + detection_scores, + detection_classes, + categories, + output_path=output_path) + with tf.gfile.GFile(output_path, 'r') as f: + written_result = f.read() + written_result = json.loads(written_result) + mask_load = mask.decode([written_result[0]['segmentation']]) + self.assertTrue(np.allclose(mask_load, detection_masks[0])) + self.assertAlmostEqual(result, written_result) + + def testExportKeypointsToCOCO(self): + image_ids = ['first', 'second'] + detection_keypoints = [ + np.array( + [[[100, 200], [300, 400], [500, 600]], + [[50, 150], [250, 350], [450, 550]]], dtype=np.int32), + np.array( + [[[110, 210], [310, 410], [510, 610]], + [[60, 160], [260, 360], [460, 560]]], dtype=np.int32)] + + detection_scores = [np.array([.8, 0.2], np.float), + np.array([.7, 0.3], np.float)] + detection_classes = [np.array([1, 1], np.int32), np.array([1, 1], np.int32)] + + categories = [{'id': 1, 'name': 'person', 'num_keypoints': 3}, + {'id': 2, 'name': 'cat'}, + {'id': 3, 'name': 'dog'}] + + output_path = os.path.join(tf.test.get_temp_dir(), 'keypoints.json') + result = coco_tools.ExportKeypointsToCOCO( + image_ids, + detection_keypoints, + detection_scores, + detection_classes, + categories, + output_path=output_path) + + with tf.gfile.GFile(output_path, 'r') as f: + written_result = f.read() + written_result = json.loads(written_result) + self.assertAlmostEqual(result, written_result) + + def testSingleImageDetectionBoxesExport(self): + boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, 1, 1]], dtype=np.float32) + classes = np.array([1, 2, 3], dtype=np.int32) + scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) + coco_boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, .5, .5]], dtype=np.float32) + coco_annotations = coco_tools.ExportSingleImageDetectionBoxesToCoco( + image_id='first_image', + category_id_set=set([1, 2, 3]), + detection_boxes=boxes, + detection_classes=classes, + detection_scores=scores) + for i, annotation in enumerate(coco_annotations): + self.assertEqual(annotation['image_id'], 'first_image') + self.assertEqual(annotation['category_id'], classes[i]) + self.assertAlmostEqual(annotation['score'], scores[i]) + self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) + + def testSingleImageDetectionMaskExport(self): + masks = np.array( + [[[1, 1,], [1, 1]], + [[0, 0], [0, 1]], + [[0, 0], [0, 0]]], dtype=np.uint8) + classes = np.array([1, 2, 3], dtype=np.int32) + scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) + coco_annotations = coco_tools.ExportSingleImageDetectionMasksToCoco( + image_id='first_image', + category_id_set=set([1, 2, 3]), + detection_classes=classes, + detection_scores=scores, + detection_masks=masks) + expected_counts = ['04', '31', '4'] + for i, mask_annotation in enumerate(coco_annotations): + self.assertEqual(mask_annotation['segmentation']['counts'], + expected_counts[i]) + self.assertTrue(np.all(np.equal(mask.decode( + mask_annotation['segmentation']), masks[i]))) + self.assertEqual(mask_annotation['image_id'], 'first_image') + self.assertEqual(mask_annotation['category_id'], classes[i]) + self.assertAlmostEqual(mask_annotation['score'], scores[i]) + + def testSingleImageGroundtruthExport(self): + masks = np.array( + [[[1, 1,], [1, 1]], + [[0, 0], [0, 1]], + [[0, 0], [0, 0]]], dtype=np.uint8) + boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, 1, 1]], dtype=np.float32) + coco_boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, .5, .5]], dtype=np.float32) + classes = np.array([1, 2, 3], dtype=np.int32) + is_crowd = np.array([0, 1, 0], dtype=np.int32) + next_annotation_id = 1 + expected_counts = ['04', '31', '4'] + + # Tests exporting without passing in is_crowd (for backward compatibility). + coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( + image_id='first_image', + category_id_set=set([1, 2, 3]), + next_annotation_id=next_annotation_id, + groundtruth_boxes=boxes, + groundtruth_classes=classes, + groundtruth_masks=masks) + for i, annotation in enumerate(coco_annotations): + self.assertEqual(annotation['segmentation']['counts'], + expected_counts[i]) + self.assertTrue(np.all(np.equal(mask.decode( + annotation['segmentation']), masks[i]))) + self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) + self.assertEqual(annotation['image_id'], 'first_image') + self.assertEqual(annotation['category_id'], classes[i]) + self.assertEqual(annotation['id'], i + next_annotation_id) + + # Tests exporting with is_crowd. + coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco( + image_id='first_image', + category_id_set=set([1, 2, 3]), + next_annotation_id=next_annotation_id, + groundtruth_boxes=boxes, + groundtruth_classes=classes, + groundtruth_masks=masks, + groundtruth_is_crowd=is_crowd) + for i, annotation in enumerate(coco_annotations): + self.assertEqual(annotation['segmentation']['counts'], + expected_counts[i]) + self.assertTrue(np.all(np.equal(mask.decode( + annotation['segmentation']), masks[i]))) + self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i]))) + self.assertEqual(annotation['image_id'], 'first_image') + self.assertEqual(annotation['category_id'], classes[i]) + self.assertEqual(annotation['iscrowd'], is_crowd[i]) + self.assertEqual(annotation['id'], i + next_annotation_id) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/io_utils.py b/metrics/io_utils.py new file mode 100644 index 0000000..900584d --- /dev/null +++ b/metrics/io_utils.py @@ -0,0 +1,34 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Common IO utils used in offline metric computation. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import csv + + +def write_csv(fid, metrics): + """Writes metrics key-value pairs to CSV file. + + Args: + fid: File identifier of an opened file. + metrics: A dictionary with metrics to be written. + """ + metrics_writer = csv.writer(fid, delimiter=',') + for metric_name, metric_value in metrics.items(): + metrics_writer.writerow([metric_name, str(metric_value)]) diff --git a/metrics/offline_eval_map_corloc.py b/metrics/offline_eval_map_corloc.py new file mode 100644 index 0000000..b5514be --- /dev/null +++ b/metrics/offline_eval_map_corloc.py @@ -0,0 +1,171 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Evaluation executable for detection data. + +This executable evaluates precomputed detections produced by a detection +model and writes the evaluation results into csv file metrics.csv, stored +in the directory, specified by --eval_dir. + +The evaluation metrics set is supplied in object_detection.protos.EvalConfig +in metrics_set field. +Currently two set of metrics are supported: +- pascal_voc_metrics: standard PASCAL VOC 2007 metric +- open_images_detection_metrics: Open Image V2 metric +All other field of object_detection.protos.EvalConfig are ignored. + +Example usage: + ./compute_metrics \ + --eval_dir=path/to/eval_dir \ + --eval_config_path=path/to/evaluation/configuration/file \ + --input_config_path=path/to/input/configuration/file +""" +import csv +import os +import re +import tensorflow as tf + +from object_detection.core import standard_fields +from object_detection.legacy import evaluator +from object_detection.metrics import tf_example_parser +from object_detection.utils import config_util +from object_detection.utils import label_map_util + +flags = tf.app.flags +tf.logging.set_verbosity(tf.logging.INFO) + +flags.DEFINE_string('eval_dir', None, 'Directory to write eval summaries to.') +flags.DEFINE_string('eval_config_path', None, + 'Path to an eval_pb2.EvalConfig config file.') +flags.DEFINE_string('input_config_path', None, + 'Path to an eval_pb2.InputConfig config file.') + +FLAGS = flags.FLAGS + + +def _generate_sharded_filenames(filename): + m = re.search(r'@(\d{1,})', filename) + if m: + num_shards = int(m.group(1)) + return [ + re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards), filename) + for i in range(num_shards) + ] + else: + return [filename] + + +def _generate_filenames(filenames): + result = [] + for filename in filenames: + result += _generate_sharded_filenames(filename) + return result + + +def read_data_and_evaluate(input_config, eval_config): + """Reads pre-computed object detections and groundtruth from tf_record. + + Args: + input_config: input config proto of type + object_detection.protos.InputReader. + eval_config: evaluation config proto of type + object_detection.protos.EvalConfig. + + Returns: + Evaluated detections metrics. + + Raises: + ValueError: if input_reader type is not supported or metric type is unknown. + """ + if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': + input_paths = input_config.tf_record_input_reader.input_path + + categories = label_map_util.create_categories_from_labelmap( + input_config.label_map_path) + + object_detection_evaluators = evaluator.get_evaluators( + eval_config, categories) + # Support a single evaluator + object_detection_evaluator = object_detection_evaluators[0] + + skipped_images = 0 + processed_images = 0 + for input_path in _generate_filenames(input_paths): + tf.logging.info('Processing file: {0}'.format(input_path)) + + record_iterator = tf.python_io.tf_record_iterator(path=input_path) + data_parser = tf_example_parser.TfExampleDetectionAndGTParser() + + for string_record in record_iterator: + tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, + processed_images) + processed_images += 1 + + example = tf.train.Example() + example.ParseFromString(string_record) + decoded_dict = data_parser.parse(example) + + if decoded_dict: + object_detection_evaluator.add_single_ground_truth_image_info( + decoded_dict[standard_fields.DetectionResultFields.key], + decoded_dict) + object_detection_evaluator.add_single_detected_image_info( + decoded_dict[standard_fields.DetectionResultFields.key], + decoded_dict) + else: + skipped_images += 1 + tf.logging.info('Skipped images: {0}'.format(skipped_images)) + + return object_detection_evaluator.evaluate() + + raise ValueError('Unsupported input_reader_config.') + + +def write_metrics(metrics, output_dir): + """Write metrics to the output directory. + + Args: + metrics: A dictionary containing metric names and values. + output_dir: Directory to write metrics to. + """ + tf.logging.info('Writing metrics.') + + with open(os.path.join(output_dir, 'metrics.csv'), 'w') as csvfile: + metrics_writer = csv.writer(csvfile, delimiter=',') + for metric_name, metric_value in metrics.items(): + metrics_writer.writerow([metric_name, str(metric_value)]) + + +def main(argv): + del argv + required_flags = ['input_config_path', 'eval_config_path', 'eval_dir'] + for flag_name in required_flags: + if not getattr(FLAGS, flag_name): + raise ValueError('Flag --{} is required'.format(flag_name)) + + configs = config_util.get_configs_from_multiple_files( + eval_input_config_path=FLAGS.input_config_path, + eval_config_path=FLAGS.eval_config_path) + + eval_config = configs['eval_config'] + input_config = configs['eval_input_config'] + + metrics = read_data_and_evaluate(input_config, eval_config) + + # Save metrics + write_metrics(metrics, FLAGS.eval_dir) + + +if __name__ == '__main__': + tf.app.run(main) diff --git a/metrics/offline_eval_map_corloc_test.py b/metrics/offline_eval_map_corloc_test.py new file mode 100644 index 0000000..68ac389 --- /dev/null +++ b/metrics/offline_eval_map_corloc_test.py @@ -0,0 +1,58 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utilities in offline_eval_map_corloc binary.""" + +import tensorflow as tf + +from object_detection.metrics import offline_eval_map_corloc as offline_eval + + +class OfflineEvalMapCorlocTest(tf.test.TestCase): + + def test_generateShardedFilenames(self): + test_filename = '/path/to/file' + result = offline_eval._generate_sharded_filenames(test_filename) + self.assertEqual(result, [test_filename]) + + test_filename = '/path/to/file-00000-of-00050' + result = offline_eval._generate_sharded_filenames(test_filename) + self.assertEqual(result, [test_filename]) + + result = offline_eval._generate_sharded_filenames('/path/to/@3.record') + self.assertEqual(result, [ + '/path/to/-00000-of-00003.record', '/path/to/-00001-of-00003.record', + '/path/to/-00002-of-00003.record' + ]) + + result = offline_eval._generate_sharded_filenames('/path/to/abc@3') + self.assertEqual(result, [ + '/path/to/abc-00000-of-00003', '/path/to/abc-00001-of-00003', + '/path/to/abc-00002-of-00003' + ]) + + result = offline_eval._generate_sharded_filenames('/path/to/@1') + self.assertEqual(result, ['/path/to/-00000-of-00001']) + + def test_generateFilenames(self): + test_filenames = ['/path/to/file', '/path/to/@3.record'] + result = offline_eval._generate_filenames(test_filenames) + self.assertEqual(result, [ + '/path/to/file', '/path/to/-00000-of-00003.record', + '/path/to/-00001-of-00003.record', '/path/to/-00002-of-00003.record' + ]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/oid_challenge_evaluation.py b/metrics/oid_challenge_evaluation.py new file mode 100644 index 0000000..25f553a --- /dev/null +++ b/metrics/oid_challenge_evaluation.py @@ -0,0 +1,149 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Runs evaluation using OpenImages groundtruth and predictions. + +Uses Open Images Challenge 2018, 2019 metrics + +Example usage: +python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \ + --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \ + --input_annotations_labels=/path/to/input/annotations-label.csv \ + --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ + --input_predictions=/path/to/input/predictions.csv \ + --output_metrics=/path/to/output/metric.csv \ + --input_annotations_segm=[/path/to/input/annotations-human-mask.csv] \ + +If optional flag has_masks is True, Mask column is also expected in CSV. + +CSVs with bounding box annotations, instance segmentations and image label +can be downloaded from the Open Images Challenge website: +https://storage.googleapis.com/openimages/web/challenge.html +The format of the input csv and the metrics itself are described on the +challenge website as well. + + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging + +from absl import app +from absl import flags +import pandas as pd +from google.protobuf import text_format + +from object_detection.metrics import io_utils +from object_detection.metrics import oid_challenge_evaluation_utils as utils +from object_detection.protos import string_int_label_map_pb2 +from object_detection.utils import object_detection_evaluation + +flags.DEFINE_string('input_annotations_boxes', None, + 'File with groundtruth boxes annotations.') +flags.DEFINE_string('input_annotations_labels', None, + 'File with groundtruth labels annotations.') +flags.DEFINE_string( + 'input_predictions', None, + """File with detection predictions; NOTE: no postprocessing is applied in the evaluation script.""" +) +flags.DEFINE_string('input_class_labelmap', None, + 'Open Images Challenge labelmap.') +flags.DEFINE_string('output_metrics', None, 'Output file with csv metrics.') +flags.DEFINE_string( + 'input_annotations_segm', None, + 'File with groundtruth instance segmentation annotations [OPTIONAL].') + +FLAGS = flags.FLAGS + + +def _load_labelmap(labelmap_path): + """Loads labelmap from the labelmap path. + + Args: + labelmap_path: Path to the labelmap. + + Returns: + A dictionary mapping class name to class numerical id + A list with dictionaries, one dictionary per category. + """ + + label_map = string_int_label_map_pb2.StringIntLabelMap() + with open(labelmap_path, 'r') as fid: + label_map_string = fid.read() + text_format.Merge(label_map_string, label_map) + labelmap_dict = {} + categories = [] + for item in label_map.item: + labelmap_dict[item.name] = item.id + categories.append({'id': item.id, 'name': item.name}) + return labelmap_dict, categories + + +def main(unused_argv): + flags.mark_flag_as_required('input_annotations_boxes') + flags.mark_flag_as_required('input_annotations_labels') + flags.mark_flag_as_required('input_predictions') + flags.mark_flag_as_required('input_class_labelmap') + flags.mark_flag_as_required('output_metrics') + + all_location_annotations = pd.read_csv(FLAGS.input_annotations_boxes) + all_label_annotations = pd.read_csv(FLAGS.input_annotations_labels) + all_label_annotations.rename( + columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) + + is_instance_segmentation_eval = False + if FLAGS.input_annotations_segm: + is_instance_segmentation_eval = True + all_segm_annotations = pd.read_csv(FLAGS.input_annotations_segm) + # Note: this part is unstable as it requires the float point numbers in both + # csvs are exactly the same; + # Will be replaced by more stable solution: merge on LabelName and ImageID + # and filter down by IoU. + all_location_annotations = utils.merge_boxes_and_masks( + all_location_annotations, all_segm_annotations) + all_annotations = pd.concat([all_location_annotations, all_label_annotations]) + + class_label_map, categories = _load_labelmap(FLAGS.input_class_labelmap) + challenge_evaluator = ( + object_detection_evaluation.OpenImagesChallengeEvaluator( + categories, evaluate_masks=is_instance_segmentation_eval)) + + all_predictions = pd.read_csv(FLAGS.input_predictions) + images_processed = 0 + for _, groundtruth in enumerate(all_annotations.groupby('ImageID')): + logging.info('Processing image %d', images_processed) + image_id, image_groundtruth = groundtruth + groundtruth_dictionary = utils.build_groundtruth_dictionary( + image_groundtruth, class_label_map) + challenge_evaluator.add_single_ground_truth_image_info( + image_id, groundtruth_dictionary) + + prediction_dictionary = utils.build_predictions_dictionary( + all_predictions.loc[all_predictions['ImageID'] == image_id], + class_label_map) + challenge_evaluator.add_single_detected_image_info(image_id, + prediction_dictionary) + images_processed += 1 + + metrics = challenge_evaluator.evaluate() + + with open(FLAGS.output_metrics, 'w') as fid: + io_utils.write_csv(fid, metrics) + + +if __name__ == '__main__': + app.run(main) diff --git a/metrics/oid_challenge_evaluation_utils.py b/metrics/oid_challenge_evaluation_utils.py new file mode 100644 index 0000000..64893db --- /dev/null +++ b/metrics/oid_challenge_evaluation_utils.py @@ -0,0 +1,197 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 +import zlib + +import numpy as np +import pandas as pd +from pycocotools import mask as coco_mask + +from object_detection.core import standard_fields + + +def _to_normalized_box(mask_np): + """Decodes binary segmentation masks into np.arrays and boxes. + + Args: + mask_np: np.ndarray of size NxWxH. + + Returns: + a np.ndarray of the size Nx4, each row containing normalized coordinates + [YMin, XMin, YMax, XMax] of a box computed of axis parallel enclosing box of + a mask. + """ + coord1, coord2 = np.nonzero(mask_np) + if coord1.size > 0: + ymin = float(min(coord1)) / mask_np.shape[0] + ymax = float(max(coord1) + 1) / mask_np.shape[0] + xmin = float(min(coord2)) / mask_np.shape[1] + xmax = float((max(coord2) + 1)) / mask_np.shape[1] + + return np.array([ymin, xmin, ymax, xmax]) + else: + return np.array([0.0, 0.0, 0.0, 0.0]) + + +def _decode_raw_data_into_masks_and_boxes(segments, image_widths, + image_heights): + """Decods binary segmentation masks into np.arrays and boxes. + + Args: + segments: pandas Series object containing either + None entries, or strings with + base64, zlib compressed, COCO RLE-encoded binary masks. + All masks are expected to be the same size. + image_widths: pandas Series of mask widths. + image_heights: pandas Series of mask heights. + + Returns: + a np.ndarray of the size NxWxH, where W and H is determined from the encoded + masks; for the None values, zero arrays of size WxH are created. If input + contains only None values, W=1, H=1. + """ + segment_masks = [] + segment_boxes = [] + ind = segments.first_valid_index() + if ind is not None: + size = [int(image_heights[ind]), int(image_widths[ind])] + else: + # It does not matter which size we pick since no masks will ever be + # evaluated. + return np.zeros((segments.shape[0], 1, 1), dtype=np.uint8), np.zeros( + (segments.shape[0], 4), dtype=np.float32) + + for segment, im_width, im_height in zip(segments, image_widths, + image_heights): + if pd.isnull(segment): + segment_masks.append(np.zeros([1, size[0], size[1]], dtype=np.uint8)) + segment_boxes.append(np.expand_dims(np.array([0.0, 0.0, 0.0, 0.0]), 0)) + else: + compressed_mask = base64.b64decode(segment) + rle_encoded_mask = zlib.decompress(compressed_mask) + decoding_dict = { + 'size': [im_height, im_width], + 'counts': rle_encoded_mask + } + mask_tensor = coco_mask.decode(decoding_dict) + + segment_masks.append(np.expand_dims(mask_tensor, 0)) + segment_boxes.append(np.expand_dims(_to_normalized_box(mask_tensor), 0)) + + return np.concatenate( + segment_masks, axis=0), np.concatenate( + segment_boxes, axis=0) + + +def merge_boxes_and_masks(box_data, mask_data): + return pd.merge( + box_data, + mask_data, + how='outer', + on=['LabelName', 'ImageID', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf']) + + +def build_groundtruth_dictionary(data, class_label_map): + """Builds a groundtruth dictionary from groundtruth data in CSV file. + + Args: + data: Pandas DataFrame with the groundtruth data for a single image. + class_label_map: Class labelmap from string label name to an integer. + + Returns: + A dictionary with keys suitable for passing to + OpenImagesDetectionChallengeEvaluator.add_single_ground_truth_image_info: + standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array + of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of + the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + standard_fields.InputDataFields.groundtruth_classes: integer numpy array + of shape [num_boxes] containing 1-indexed groundtruth classes for the + boxes. + standard_fields.InputDataFields.verified_labels: integer 1D numpy array + containing all classes for which labels are verified. + standard_fields.InputDataFields.groundtruth_group_of: Optional length + M numpy boolean array denoting whether a groundtruth box contains a + group of instances. + """ + data_location = data[data.XMin.notnull()] + data_labels = data[data.ConfidenceImageLabel.notnull()] + + dictionary = { + standard_fields.InputDataFields.groundtruth_boxes: + data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), + standard_fields.InputDataFields.groundtruth_classes: + data_location['LabelName'].map(lambda x: class_label_map[x] + ).as_matrix(), + standard_fields.InputDataFields.groundtruth_group_of: + data_location['IsGroupOf'].as_matrix().astype(int), + standard_fields.InputDataFields.groundtruth_image_classes: + data_labels['LabelName'].map(lambda x: class_label_map[x] + ).as_matrix(), + } + + if 'Mask' in data_location: + segments, _ = _decode_raw_data_into_masks_and_boxes( + data_location['Mask'], data_location['ImageWidth'], + data_location['ImageHeight']) + dictionary[ + standard_fields.InputDataFields.groundtruth_instance_masks] = segments + + return dictionary + + +def build_predictions_dictionary(data, class_label_map): + """Builds a predictions dictionary from predictions data in CSV file. + + Args: + data: Pandas DataFrame with the predictions data for a single image. + class_label_map: Class labelmap from string label name to an integer. + + Returns: + Dictionary with keys suitable for passing to + OpenImagesDetectionChallengeEvaluator.add_single_detected_image_info: + standard_fields.DetectionResultFields.detection_boxes: float32 numpy + array of shape [num_boxes, 4] containing `num_boxes` detection boxes + of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + standard_fields.DetectionResultFields.detection_scores: float32 numpy + array of shape [num_boxes] containing detection scores for the boxes. + standard_fields.DetectionResultFields.detection_classes: integer numpy + array of shape [num_boxes] containing 1-indexed detection classes for + the boxes. + + """ + dictionary = { + standard_fields.DetectionResultFields.detection_classes: + data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), + standard_fields.DetectionResultFields.detection_scores: + data['Score'].as_matrix() + } + + if 'Mask' in data: + segments, boxes = _decode_raw_data_into_masks_and_boxes( + data['Mask'], data['ImageWidth'], data['ImageHeight']) + dictionary[standard_fields.DetectionResultFields.detection_masks] = segments + dictionary[standard_fields.DetectionResultFields.detection_boxes] = boxes + else: + dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[ + 'YMin', 'XMin', 'YMax', 'XMax' + ]].as_matrix() + + return dictionary diff --git a/metrics/oid_challenge_evaluation_utils_test.py b/metrics/oid_challenge_evaluation_utils_test.py new file mode 100644 index 0000000..f3894cb --- /dev/null +++ b/metrics/oid_challenge_evaluation_utils_test.py @@ -0,0 +1,306 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for oid_od_challenge_evaluation_util.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 +import zlib + +import numpy as np +import pandas as pd +from pycocotools import mask as coco_mask +import tensorflow as tf + +from object_detection.core import standard_fields +from object_detection.metrics import oid_challenge_evaluation_utils as utils + + +def encode_mask(mask_to_encode): + """Encodes a binary mask into the Kaggle challenge text format. + + The encoding is done in three stages: + - COCO RLE-encoding, + - zlib compression, + - base64 encoding (to use as entry in csv file). + + Args: + mask_to_encode: binary np.ndarray of dtype bool and 2d shape. + + Returns: + A (base64) text string of the encoded mask. + """ + mask_to_encode = np.squeeze(mask_to_encode) + mask_to_encode = mask_to_encode.reshape(mask_to_encode.shape[0], + mask_to_encode.shape[1], 1) + mask_to_encode = mask_to_encode.astype(np.uint8) + mask_to_encode = np.asfortranarray(mask_to_encode) + encoded_mask = coco_mask.encode(mask_to_encode)[0]['counts'] + compressed_mask = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION) + base64_mask = base64.b64encode(compressed_mask) + return base64_mask + + +class OidUtilTest(tf.test.TestCase): + + def testMaskToNormalizedBox(self): + mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0]]) + box = utils._to_normalized_box(mask_np) + self.assertAllEqual(np.array([0.25, 0.25, 0.75, 0.5]), box) + mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1], [0, 1, 1, 1]]) + box = utils._to_normalized_box(mask_np) + self.assertAllEqual(np.array([0.25, 0.25, 1.0, 1.0]), box) + mask_np = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) + box = utils._to_normalized_box(mask_np) + self.assertAllEqual(np.array([0.0, 0.0, 0.0, 0.0]), box) + + def testDecodeToTensors(self): + mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0]], dtype=np.uint8) + mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.uint8) + + encoding1 = encode_mask(mask1) + encoding2 = encode_mask(mask2) + + vals = pd.Series([encoding1, encoding2]) + image_widths = pd.Series([mask1.shape[1], mask2.shape[1]]) + image_heights = pd.Series([mask1.shape[0], mask2.shape[0]]) + + segm, bbox = utils._decode_raw_data_into_masks_and_boxes( + vals, image_widths, image_heights) + expected_segm = np.concatenate( + [np.expand_dims(mask1, 0), + np.expand_dims(mask2, 0)], axis=0) + expected_bbox = np.array([[0.0, 0.5, 2.0 / 3.0, 1.0], [0, 0, 0, 0]]) + self.assertAllEqual(expected_segm, segm) + self.assertAllEqual(expected_bbox, bbox) + + def testDecodeToTensorsNoMasks(self): + vals = pd.Series([None, None]) + image_widths = pd.Series([None, None]) + image_heights = pd.Series([None, None]) + segm, bbox = utils._decode_raw_data_into_masks_and_boxes( + vals, image_widths, image_heights) + self.assertAllEqual(np.zeros((2, 1, 1), dtype=np.uint8), segm) + self.assertAllEqual(np.zeros((2, 4), dtype=np.float32), bbox) + + +class OidChallengeEvaluationUtilTest(tf.test.TestCase): + + def testBuildGroundtruthDictionaryBoxes(self): + np_data = pd.DataFrame( + [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None], + ['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None], + ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1], + ['fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0], + ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]], + columns=[ + 'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf', + 'ConfidenceImageLabel' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} + groundtruth_dictionary = utils.build_groundtruth_dictionary( + np_data, class_label_map) + + self.assertIn(standard_fields.InputDataFields.groundtruth_boxes, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_classes, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_group_of, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes, + groundtruth_dictionary) + + self.assertAllEqual( + np.array([1, 3]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_classes]) + self.assertAllEqual( + np.array([1, 0]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_group_of]) + + expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]]) + + self.assertNDArrayNear( + expected_boxes_data, groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_boxes], 1e-5) + self.assertAllEqual( + np.array([1, 2, 3]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_image_classes]) + + def testBuildPredictionDictionaryBoxes(self): + np_data = pd.DataFrame( + [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1], + ['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2], + ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]], + columns=[ + 'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} + prediction_dictionary = utils.build_predictions_dictionary( + np_data, class_label_map) + + self.assertIn(standard_fields.DetectionResultFields.detection_boxes, + prediction_dictionary) + self.assertIn(standard_fields.DetectionResultFields.detection_classes, + prediction_dictionary) + self.assertIn(standard_fields.DetectionResultFields.detection_scores, + prediction_dictionary) + + self.assertAllEqual( + np.array([1, 3, 1]), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_classes]) + expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2], + [0.2, 0.0, 0.3, 0.1]]) + self.assertNDArrayNear( + expected_boxes_data, prediction_dictionary[ + standard_fields.DetectionResultFields.detection_boxes], 1e-5) + self.assertNDArrayNear( + np.array([0.1, 0.2, 0.3]), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_scores], 1e-5) + + def testBuildGroundtruthDictionaryMasks(self): + mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]], + dtype=np.uint8) + mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + dtype=np.uint8) + + encoding1 = encode_mask(mask1) + encoding2 = encode_mask(mask2) + + np_data = pd.DataFrame( + [[ + 'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3', + 0.0, 0.3, 0.5, 0.6, 0, None, encoding1 + ], + [ + 'fe58ec1b06db2bb7', None, None, '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 1, + None, None + ], + [ + 'fe58ec1b06db2bb7', mask2.shape[1], mask2.shape[0], '/m/02gy9n', + 0.5, 0.6, 0.8, 0.9, 0, None, encoding2 + ], + [ + 'fe58ec1b06db2bb7', None, None, '/m/04bcr3', None, None, None, + None, None, 1, None + ], + [ + 'fe58ec1b06db2bb7', None, None, '/m/083vt', None, None, None, None, + None, 0, None + ], + [ + 'fe58ec1b06db2bb7', None, None, '/m/02gy9n', None, None, None, + None, None, 1, None + ]], + columns=[ + 'ImageID', 'ImageWidth', 'ImageHeight', 'LabelName', 'XMin', 'XMax', + 'YMin', 'YMax', 'IsGroupOf', 'ConfidenceImageLabel', 'Mask' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} + groundtruth_dictionary = utils.build_groundtruth_dictionary( + np_data, class_label_map) + self.assertIn(standard_fields.InputDataFields.groundtruth_boxes, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_classes, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_group_of, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes, + groundtruth_dictionary) + self.assertIn(standard_fields.InputDataFields.groundtruth_instance_masks, + groundtruth_dictionary) + self.assertAllEqual( + np.array([1, 3, 3]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_classes]) + self.assertAllEqual( + np.array([0, 1, 0]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_group_of]) + + expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2], + [0.8, 0.5, 0.9, 0.6]]) + + self.assertNDArrayNear( + expected_boxes_data, groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_boxes], 1e-5) + self.assertAllEqual( + np.array([1, 2, 3]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_image_classes]) + + expected_segm = np.concatenate([ + np.expand_dims(mask1, 0), + np.zeros((1, 4, 4), dtype=np.uint8), + np.expand_dims(mask2, 0) + ], + axis=0) + self.assertAllEqual( + expected_segm, groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_instance_masks]) + + def testBuildPredictionDictionaryMasks(self): + mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]], + dtype=np.uint8) + mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + dtype=np.uint8) + + encoding1 = encode_mask(mask1) + encoding2 = encode_mask(mask2) + + np_data = pd.DataFrame([[ + 'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3', + encoding1, 0.8 + ], + [ + 'fe58ec1b06db2bb7', mask2.shape[1], + mask2.shape[0], '/m/02gy9n', encoding2, 0.6 + ]], + columns=[ + 'ImageID', 'ImageWidth', 'ImageHeight', + 'LabelName', 'Mask', 'Score' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/02gy9n': 3} + prediction_dictionary = utils.build_predictions_dictionary( + np_data, class_label_map) + + self.assertIn(standard_fields.DetectionResultFields.detection_boxes, + prediction_dictionary) + self.assertIn(standard_fields.DetectionResultFields.detection_classes, + prediction_dictionary) + self.assertIn(standard_fields.DetectionResultFields.detection_scores, + prediction_dictionary) + self.assertIn(standard_fields.DetectionResultFields.detection_masks, + prediction_dictionary) + + self.assertAllEqual( + np.array([1, 3]), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_classes]) + + expected_boxes_data = np.array([[0.0, 0.5, 0.5, 1.0], [0, 0, 0, 0]]) + self.assertNDArrayNear( + expected_boxes_data, prediction_dictionary[ + standard_fields.DetectionResultFields.detection_boxes], 1e-5) + self.assertNDArrayNear( + np.array([0.8, 0.6]), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_scores], 1e-5) + expected_segm = np.concatenate( + [np.expand_dims(mask1, 0), + np.expand_dims(mask2, 0)], axis=0) + self.assertAllEqual( + expected_segm, prediction_dictionary[ + standard_fields.DetectionResultFields.detection_masks]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/oid_vrd_challenge_evaluation.py b/metrics/oid_vrd_challenge_evaluation.py new file mode 100644 index 0000000..7a56c6b --- /dev/null +++ b/metrics/oid_vrd_challenge_evaluation.py @@ -0,0 +1,154 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Runs evaluation using OpenImages groundtruth and predictions. + +Example usage: +python \ +models/research/object_detection/metrics/oid_vrd_challenge_evaluation.py \ + --input_annotations_vrd=/path/to/input/annotations-human-bbox.csv \ + --input_annotations_labels=/path/to/input/annotations-label.csv \ + --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ + --input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \ + --input_predictions=/path/to/input/predictions.csv \ + --output_metrics=/path/to/output/metric.csv \ + +CSVs with bounding box annotations and image label (including the image URLs) +can be downloaded from the Open Images Challenge website: +https://storage.googleapis.com/openimages/web/challenge.html +The format of the input csv and the metrics itself are described on the +challenge website. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import pandas as pd +from google.protobuf import text_format + +from object_detection.metrics import io_utils +from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils +from object_detection.protos import string_int_label_map_pb2 +from object_detection.utils import vrd_evaluation + + +def _load_labelmap(labelmap_path): + """Loads labelmap from the labelmap path. + + Args: + labelmap_path: Path to the labelmap. + + Returns: + A dictionary mapping class name to class numerical id. + """ + + label_map = string_int_label_map_pb2.StringIntLabelMap() + with open(labelmap_path, 'r') as fid: + label_map_string = fid.read() + text_format.Merge(label_map_string, label_map) + labelmap_dict = {} + for item in label_map.item: + labelmap_dict[item.name] = item.id + return labelmap_dict + + +def _swap_labelmap_dict(labelmap_dict): + """Swaps keys and labels in labelmap. + + Args: + labelmap_dict: Input dictionary. + + Returns: + A dictionary mapping class name to class numerical id. + """ + return dict((v, k) for k, v in labelmap_dict.iteritems()) + + +def main(parsed_args): + all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes) + all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels) + all_annotations = pd.concat([all_box_annotations, all_label_annotations]) + + class_label_map = _load_labelmap(parsed_args.input_class_labelmap) + relationship_label_map = _load_labelmap( + parsed_args.input_relationship_labelmap) + + relation_evaluator = vrd_evaluation.VRDRelationDetectionEvaluator() + phrase_evaluator = vrd_evaluation.VRDPhraseDetectionEvaluator() + + for _, groundtruth in enumerate(all_annotations.groupby('ImageID')): + image_id, image_groundtruth = groundtruth + groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary( + image_groundtruth, class_label_map, relationship_label_map) + + relation_evaluator.add_single_ground_truth_image_info( + image_id, groundtruth_dictionary) + phrase_evaluator.add_single_ground_truth_image_info(image_id, + groundtruth_dictionary) + + all_predictions = pd.read_csv(parsed_args.input_predictions) + for _, prediction_data in enumerate(all_predictions.groupby('ImageID')): + image_id, image_predictions = prediction_data + prediction_dictionary = utils.build_predictions_vrd_dictionary( + image_predictions, class_label_map, relationship_label_map) + + relation_evaluator.add_single_detected_image_info(image_id, + prediction_dictionary) + phrase_evaluator.add_single_detected_image_info(image_id, + prediction_dictionary) + + relation_metrics = relation_evaluator.evaluate( + relationships=_swap_labelmap_dict(relationship_label_map)) + phrase_metrics = phrase_evaluator.evaluate( + relationships=_swap_labelmap_dict(relationship_label_map)) + + with open(parsed_args.output_metrics, 'w') as fid: + io_utils.write_csv(fid, relation_metrics) + io_utils.write_csv(fid, phrase_metrics) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description= + 'Evaluate Open Images Visual Relationship Detection predictions.') + parser.add_argument( + '--input_annotations_vrd', + required=True, + help='File with groundtruth vrd annotations.') + parser.add_argument( + '--input_annotations_labels', + required=True, + help='File with groundtruth labels annotations') + parser.add_argument( + '--input_predictions', + required=True, + help="""File with detection predictions; NOTE: no postprocessing is + applied in the evaluation script.""") + parser.add_argument( + '--input_class_labelmap', + required=True, + help="""OpenImages Challenge labelmap; note: it is expected to include + attributes.""") + parser.add_argument( + '--input_relationship_labelmap', + required=True, + help="""OpenImages Challenge relationship labelmap.""") + parser.add_argument( + '--output_metrics', required=True, help='Output file with csv metrics') + + args = parser.parse_args() + main(args) diff --git a/metrics/oid_vrd_challenge_evaluation_utils.py b/metrics/oid_vrd_challenge_evaluation_utils.py new file mode 100644 index 0000000..34be018 --- /dev/null +++ b/metrics/oid_vrd_challenge_evaluation_utils.py @@ -0,0 +1,125 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Converts data from CSV format to the VRDDetectionEvaluator format.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from object_detection.core import standard_fields +from object_detection.utils import vrd_evaluation + + +def build_groundtruth_vrd_dictionary(data, class_label_map, + relationship_label_map): + """Builds a groundtruth dictionary from groundtruth data in CSV file. + + Args: + data: Pandas DataFrame with the groundtruth data for a single image. + class_label_map: Class labelmap from string label name to an integer. + relationship_label_map: Relationship type labelmap from string name to an + integer. + + Returns: + A dictionary with keys suitable for passing to + VRDDetectionEvaluator.add_single_ground_truth_image_info: + standard_fields.InputDataFields.groundtruth_boxes: A numpy array + of structures with the shape [M, 1], representing M tuples, each tuple + containing the same number of named bounding boxes. + Each box is of the format [y_min, x_min, y_max, x_max] (see + datatype vrd_box_data_type, single_box_data_type above). + standard_fields.InputDataFields.groundtruth_classes: A numpy array of + structures shape [M, 1], representing the class labels of the + corresponding bounding boxes and possibly additional classes (see + datatype label_data_type above). + standard_fields.InputDataFields.verified_labels: numpy array + of shape [K] containing verified labels. + """ + data_boxes = data[data.LabelName.isnull()] + data_labels = data[data.LabelName1.isnull()] + + boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) + boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', + 'XMax1']].as_matrix() + boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() + + labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) + labels['subject'] = data_boxes['LabelName1'].map( + lambda x: class_label_map[x]).as_matrix() + labels['object'] = data_boxes['LabelName2'].map( + lambda x: class_label_map[x]).as_matrix() + labels['relation'] = data_boxes['RelationshipLabel'].map( + lambda x: relationship_label_map[x]).as_matrix() + + return { + standard_fields.InputDataFields.groundtruth_boxes: + boxes, + standard_fields.InputDataFields.groundtruth_classes: + labels, + standard_fields.InputDataFields.groundtruth_image_classes: + data_labels['LabelName'].map(lambda x: class_label_map[x]) + .as_matrix(), + } + + +def build_predictions_vrd_dictionary(data, class_label_map, + relationship_label_map): + """Builds a predictions dictionary from predictions data in CSV file. + + Args: + data: Pandas DataFrame with the predictions data for a single image. + class_label_map: Class labelmap from string label name to an integer. + relationship_label_map: Relationship type labelmap from string name to an + integer. + + Returns: + Dictionary with keys suitable for passing to + VRDDetectionEvaluator.add_single_detected_image_info: + standard_fields.DetectionResultFields.detection_boxes: A numpy array of + structures with shape [N, 1], representing N tuples, each tuple + containing the same number of named bounding boxes. + Each box is of the format [y_min, x_min, y_max, x_max] (as an example + see datatype vrd_box_data_type, single_box_data_type above). + standard_fields.DetectionResultFields.detection_scores: float32 numpy + array of shape [N] containing detection scores for the boxes. + standard_fields.DetectionResultFields.detection_classes: A numpy array + of structures shape [N, 1], representing the class labels of the + corresponding bounding boxes and possibly additional classes (see + datatype label_data_type above). + """ + data_boxes = data + + boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type) + boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1', + 'XMax1']].as_matrix() + boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix() + + labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type) + labels['subject'] = data_boxes['LabelName1'].map( + lambda x: class_label_map[x]).as_matrix() + labels['object'] = data_boxes['LabelName2'].map( + lambda x: class_label_map[x]).as_matrix() + labels['relation'] = data_boxes['RelationshipLabel'].map( + lambda x: relationship_label_map[x]).as_matrix() + + return { + standard_fields.DetectionResultFields.detection_boxes: + boxes, + standard_fields.DetectionResultFields.detection_classes: + labels, + standard_fields.DetectionResultFields.detection_scores: + data_boxes['Score'].as_matrix() + } diff --git a/metrics/oid_vrd_challenge_evaluation_utils_test.py b/metrics/oid_vrd_challenge_evaluation_utils_test.py new file mode 100644 index 0000000..7381828 --- /dev/null +++ b/metrics/oid_vrd_challenge_evaluation_utils_test.py @@ -0,0 +1,149 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for oid_vrd_challenge_evaluation_utils.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import pandas as pd +import tensorflow as tf +from object_detection.core import standard_fields +from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils +from object_detection.utils import vrd_evaluation + + +class OidVrdChallengeEvaluationUtilsTest(tf.test.TestCase): + + def testBuildGroundtruthDictionary(self): + np_data = pd.DataFrame( + [[ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.3, 0.5, 0.6, + 0.0, 0.3, 0.5, 0.6, 'is', None, None + ], [ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/02gy9n', 0.0, 0.3, 0.5, 0.6, + 0.1, 0.2, 0.3, 0.4, 'under', None, None + ], [ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.1, 0.2, 0.3, + 0.0, 0.1, 0.2, 0.3, 'is', None, None + ], [ + 'fe58ec1b06db2bb7', '/m/083vt', '/m/04bcr3', 0.1, 0.2, 0.3, 0.4, + 0.5, 0.6, 0.7, 0.8, 'at', None, None + ], [ + 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, + None, None, None, '/m/04bcr3', 1.0 + ], [ + 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, + None, None, None, '/m/083vt', 0.0 + ], [ + 'fe58ec1b06db2bb7', None, None, None, None, None, None, None, None, + None, None, None, '/m/02gy9n', 0.0 + ]], + columns=[ + 'ImageID', 'LabelName1', 'LabelName2', 'XMin1', 'XMax1', 'YMin1', + 'YMax1', 'XMin2', 'XMax2', 'YMin2', 'YMax2', 'RelationshipLabel', + 'LabelName', 'Confidence' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} + relationship_label_map = {'is': 1, 'under': 2, 'at': 3} + groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary( + np_data, class_label_map, relationship_label_map) + + self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in + groundtruth_dictionary) + self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in + groundtruth_dictionary) + self.assertTrue(standard_fields.InputDataFields.groundtruth_image_classes in + groundtruth_dictionary) + + self.assertAllEqual( + np.array( + [(1, 2, 1), (1, 3, 2), (1, 2, 1), (2, 1, 3)], + dtype=vrd_evaluation.label_data_type), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_classes]) + expected_vrd_data = np.array( + [ + ([0.5, 0.0, 0.6, 0.3], [0.5, 0.0, 0.6, 0.3]), + ([0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]), + ([0.2, 0.0, 0.3, 0.1], [0.2, 0.0, 0.3, 0.1]), + ([0.3, 0.1, 0.4, 0.2], [0.7, 0.5, 0.8, 0.6]), + ], + dtype=vrd_evaluation.vrd_box_data_type) + for field in expected_vrd_data.dtype.fields: + self.assertNDArrayNear( + expected_vrd_data[field], groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_boxes][field], 1e-5) + self.assertAllEqual( + np.array([1, 2, 3]), groundtruth_dictionary[ + standard_fields.InputDataFields.groundtruth_image_classes]) + + def testBuildPredictionDictionary(self): + np_data = pd.DataFrame( + [[ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.3, 0.5, 0.6, + 0.0, 0.3, 0.5, 0.6, 'is', 0.1 + ], [ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/02gy9n', 0.0, 0.3, 0.5, 0.6, + 0.1, 0.2, 0.3, 0.4, 'under', 0.2 + ], [ + 'fe58ec1b06db2bb7', '/m/04bcr3', '/m/083vt', 0.0, 0.1, 0.2, 0.3, + 0.0, 0.1, 0.2, 0.3, 'is', 0.3 + ], [ + 'fe58ec1b06db2bb7', '/m/083vt', '/m/04bcr3', 0.1, 0.2, 0.3, 0.4, + 0.5, 0.6, 0.7, 0.8, 'at', 0.4 + ]], + columns=[ + 'ImageID', 'LabelName1', 'LabelName2', 'XMin1', 'XMax1', 'YMin1', + 'YMax1', 'XMin2', 'XMax2', 'YMin2', 'YMax2', 'RelationshipLabel', + 'Score' + ]) + class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} + relationship_label_map = {'is': 1, 'under': 2, 'at': 3} + prediction_dictionary = utils.build_predictions_vrd_dictionary( + np_data, class_label_map, relationship_label_map) + + self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in + prediction_dictionary) + self.assertTrue(standard_fields.DetectionResultFields.detection_classes in + prediction_dictionary) + self.assertTrue(standard_fields.DetectionResultFields.detection_scores in + prediction_dictionary) + + self.assertAllEqual( + np.array( + [(1, 2, 1), (1, 3, 2), (1, 2, 1), (2, 1, 3)], + dtype=vrd_evaluation.label_data_type), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_classes]) + expected_vrd_data = np.array( + [ + ([0.5, 0.0, 0.6, 0.3], [0.5, 0.0, 0.6, 0.3]), + ([0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]), + ([0.2, 0.0, 0.3, 0.1], [0.2, 0.0, 0.3, 0.1]), + ([0.3, 0.1, 0.4, 0.2], [0.7, 0.5, 0.8, 0.6]), + ], + dtype=vrd_evaluation.vrd_box_data_type) + for field in expected_vrd_data.dtype.fields: + self.assertNDArrayNear( + expected_vrd_data[field], prediction_dictionary[ + standard_fields.DetectionResultFields.detection_boxes][field], + 1e-5) + self.assertNDArrayNear( + np.array([0.1, 0.2, 0.3, 0.4]), prediction_dictionary[ + standard_fields.DetectionResultFields.detection_scores], 1e-5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/metrics/tf_example_parser.py b/metrics/tf_example_parser.py new file mode 100644 index 0000000..9a5f130 --- /dev/null +++ b/metrics/tf_example_parser.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tensorflow Example proto parser for data loading. + +A parser to decode data containing serialized tensorflow.Example +protos into materialized tensors (numpy arrays). +""" + +import numpy as np + +from object_detection.core import data_parser +from object_detection.core import standard_fields as fields + + +class FloatParser(data_parser.DataToNumpyParser): + """Tensorflow Example float parser.""" + + def __init__(self, field_name): + self.field_name = field_name + + def parse(self, tf_example): + return np.array( + tf_example.features.feature[self.field_name].float_list.value, + dtype=np.float).transpose() if tf_example.features.feature[ + self.field_name].HasField("float_list") else None + + +class StringParser(data_parser.DataToNumpyParser): + """Tensorflow Example string parser.""" + + def __init__(self, field_name): + self.field_name = field_name + + def parse(self, tf_example): + return "".join(tf_example.features.feature[self.field_name] + .bytes_list.value) if tf_example.features.feature[ + self.field_name].HasField("bytes_list") else None + + +class Int64Parser(data_parser.DataToNumpyParser): + """Tensorflow Example int64 parser.""" + + def __init__(self, field_name): + self.field_name = field_name + + def parse(self, tf_example): + return np.array( + tf_example.features.feature[self.field_name].int64_list.value, + dtype=np.int64).transpose() if tf_example.features.feature[ + self.field_name].HasField("int64_list") else None + + +class BoundingBoxParser(data_parser.DataToNumpyParser): + """Tensorflow Example bounding box parser.""" + + def __init__(self, xmin_field_name, ymin_field_name, xmax_field_name, + ymax_field_name): + self.field_names = [ + ymin_field_name, xmin_field_name, ymax_field_name, xmax_field_name + ] + + def parse(self, tf_example): + result = [] + parsed = True + for field_name in self.field_names: + result.append(tf_example.features.feature[field_name].float_list.value) + parsed &= ( + tf_example.features.feature[field_name].HasField("float_list")) + + return np.array(result).transpose() if parsed else None + + +class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser): + """Tensorflow Example proto parser.""" + + def __init__(self): + self.items_to_handlers = { + fields.DetectionResultFields.key: + StringParser(fields.TfExampleFields.source_id), + # Object ground truth boxes and classes. + fields.InputDataFields.groundtruth_boxes: (BoundingBoxParser( + fields.TfExampleFields.object_bbox_xmin, + fields.TfExampleFields.object_bbox_ymin, + fields.TfExampleFields.object_bbox_xmax, + fields.TfExampleFields.object_bbox_ymax)), + fields.InputDataFields.groundtruth_classes: ( + Int64Parser(fields.TfExampleFields.object_class_label)), + # Object detections. + fields.DetectionResultFields.detection_boxes: (BoundingBoxParser( + fields.TfExampleFields.detection_bbox_xmin, + fields.TfExampleFields.detection_bbox_ymin, + fields.TfExampleFields.detection_bbox_xmax, + fields.TfExampleFields.detection_bbox_ymax)), + fields.DetectionResultFields.detection_classes: ( + Int64Parser(fields.TfExampleFields.detection_class_label)), + fields.DetectionResultFields.detection_scores: ( + FloatParser(fields.TfExampleFields.detection_score)), + } + + self.optional_items_to_handlers = { + fields.InputDataFields.groundtruth_difficult: + Int64Parser(fields.TfExampleFields.object_difficult), + fields.InputDataFields.groundtruth_group_of: + Int64Parser(fields.TfExampleFields.object_group_of), + fields.InputDataFields.groundtruth_image_classes: + Int64Parser(fields.TfExampleFields.image_class_label), + } + + def parse(self, tf_example): + """Parses tensorflow example and returns a tensor dictionary. + + Args: + tf_example: a tf.Example object. + + Returns: + A dictionary of the following numpy arrays: + fields.DetectionResultFields.source_id - string containing original image + id. + fields.InputDataFields.groundtruth_boxes - a numpy array containing + groundtruth boxes. + fields.InputDataFields.groundtruth_classes - a numpy array containing + groundtruth classes. + fields.InputDataFields.groundtruth_group_of - a numpy array containing + groundtruth group of flag (optional, None if not specified). + fields.InputDataFields.groundtruth_difficult - a numpy array containing + groundtruth difficult flag (optional, None if not specified). + fields.InputDataFields.groundtruth_image_classes - a numpy array + containing groundtruth image-level labels. + fields.DetectionResultFields.detection_boxes - a numpy array containing + detection boxes. + fields.DetectionResultFields.detection_classes - a numpy array containing + detection class labels. + fields.DetectionResultFields.detection_scores - a numpy array containing + detection scores. + Returns None if tf.Example was not parsed or non-optional fields were not + found. + """ + results_dict = {} + parsed = True + for key, parser in self.items_to_handlers.items(): + results_dict[key] = parser.parse(tf_example) + parsed &= (results_dict[key] is not None) + + for key, parser in self.optional_items_to_handlers.items(): + results_dict[key] = parser.parse(tf_example) + + return results_dict if parsed else None diff --git a/metrics/tf_example_parser_test.py b/metrics/tf_example_parser_test.py new file mode 100644 index 0000000..7d265cc --- /dev/null +++ b/metrics/tf_example_parser_test.py @@ -0,0 +1,197 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.data_decoders.tf_example_parser.""" + +import numpy as np +import numpy.testing as np_testing +import tensorflow as tf + +from object_detection.core import standard_fields as fields +from object_detection.metrics import tf_example_parser + + +class TfExampleDecoderTest(tf.test.TestCase): + + def _Int64Feature(self, value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + def _FloatFeature(self, value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + def _BytesFeature(self, value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + def testParseDetectionsAndGT(self): + source_id = 'abc.jpg' + # y_min, x_min, y_max, x_max + object_bb = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], [1.0, 0.6, 0.8], + [1.0, 0.6, 0.7]]).transpose() + detection_bb = np.array([[0.1, 0.2], [0.0, 0.8], [1.0, 0.6], + [1.0, 0.85]]).transpose() + + object_class_label = [1, 1, 2] + object_difficult = [1, 0, 0] + object_group_of = [0, 0, 1] + verified_labels = [1, 2, 3, 4] + detection_class_label = [2, 1] + detection_score = [0.5, 0.3] + features = { + fields.TfExampleFields.source_id: + self._BytesFeature(source_id), + fields.TfExampleFields.object_bbox_ymin: + self._FloatFeature(object_bb[:, 0].tolist()), + fields.TfExampleFields.object_bbox_xmin: + self._FloatFeature(object_bb[:, 1].tolist()), + fields.TfExampleFields.object_bbox_ymax: + self._FloatFeature(object_bb[:, 2].tolist()), + fields.TfExampleFields.object_bbox_xmax: + self._FloatFeature(object_bb[:, 3].tolist()), + fields.TfExampleFields.detection_bbox_ymin: + self._FloatFeature(detection_bb[:, 0].tolist()), + fields.TfExampleFields.detection_bbox_xmin: + self._FloatFeature(detection_bb[:, 1].tolist()), + fields.TfExampleFields.detection_bbox_ymax: + self._FloatFeature(detection_bb[:, 2].tolist()), + fields.TfExampleFields.detection_bbox_xmax: + self._FloatFeature(detection_bb[:, 3].tolist()), + fields.TfExampleFields.detection_class_label: + self._Int64Feature(detection_class_label), + fields.TfExampleFields.detection_score: + self._FloatFeature(detection_score), + } + + example = tf.train.Example(features=tf.train.Features(feature=features)) + parser = tf_example_parser.TfExampleDetectionAndGTParser() + + results_dict = parser.parse(example) + self.assertIsNone(results_dict) + + features[fields.TfExampleFields.object_class_label] = ( + self._Int64Feature(object_class_label)) + features[fields.TfExampleFields.object_difficult] = ( + self._Int64Feature(object_difficult)) + + example = tf.train.Example(features=tf.train.Features(feature=features)) + results_dict = parser.parse(example) + + self.assertIsNotNone(results_dict) + self.assertEqual(source_id, results_dict[fields.DetectionResultFields.key]) + np_testing.assert_almost_equal( + object_bb, results_dict[fields.InputDataFields.groundtruth_boxes]) + np_testing.assert_almost_equal( + detection_bb, + results_dict[fields.DetectionResultFields.detection_boxes]) + np_testing.assert_almost_equal( + detection_score, + results_dict[fields.DetectionResultFields.detection_scores]) + np_testing.assert_almost_equal( + detection_class_label, + results_dict[fields.DetectionResultFields.detection_classes]) + np_testing.assert_almost_equal( + object_difficult, + results_dict[fields.InputDataFields.groundtruth_difficult]) + np_testing.assert_almost_equal( + object_class_label, + results_dict[fields.InputDataFields.groundtruth_classes]) + + parser = tf_example_parser.TfExampleDetectionAndGTParser() + + features[fields.TfExampleFields.object_group_of] = ( + self._Int64Feature(object_group_of)) + + example = tf.train.Example(features=tf.train.Features(feature=features)) + results_dict = parser.parse(example) + self.assertIsNotNone(results_dict) + np_testing.assert_equal( + object_group_of, + results_dict[fields.InputDataFields.groundtruth_group_of]) + + features[fields.TfExampleFields.image_class_label] = ( + self._Int64Feature(verified_labels)) + + example = tf.train.Example(features=tf.train.Features(feature=features)) + results_dict = parser.parse(example) + self.assertIsNotNone(results_dict) + np_testing.assert_equal( + verified_labels, + results_dict[fields.InputDataFields.groundtruth_image_classes]) + + def testParseString(self): + string_val = 'abc' + features = {'string': self._BytesFeature(string_val)} + example = tf.train.Example(features=tf.train.Features(feature=features)) + + parser = tf_example_parser.StringParser('string') + result = parser.parse(example) + self.assertIsNotNone(result) + self.assertEqual(result, string_val) + + parser = tf_example_parser.StringParser('another_string') + result = parser.parse(example) + self.assertIsNone(result) + + def testParseFloat(self): + float_array_val = [1.5, 1.4, 2.0] + features = {'floats': self._FloatFeature(float_array_val)} + example = tf.train.Example(features=tf.train.Features(feature=features)) + + parser = tf_example_parser.FloatParser('floats') + result = parser.parse(example) + self.assertIsNotNone(result) + np_testing.assert_almost_equal(result, float_array_val) + + parser = tf_example_parser.StringParser('another_floats') + result = parser.parse(example) + self.assertIsNone(result) + + def testInt64Parser(self): + int_val = [1, 2, 3] + features = {'ints': self._Int64Feature(int_val)} + example = tf.train.Example(features=tf.train.Features(feature=features)) + + parser = tf_example_parser.Int64Parser('ints') + result = parser.parse(example) + self.assertIsNotNone(result) + np_testing.assert_almost_equal(result, int_val) + + parser = tf_example_parser.Int64Parser('another_ints') + result = parser.parse(example) + self.assertIsNone(result) + + def testBoundingBoxParser(self): + bounding_boxes = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], + [1.0, 0.6, 0.8], [1.0, 0.6, 0.7]]).transpose() + features = { + 'ymin': self._FloatFeature(bounding_boxes[:, 0]), + 'xmin': self._FloatFeature(bounding_boxes[:, 1]), + 'ymax': self._FloatFeature(bounding_boxes[:, 2]), + 'xmax': self._FloatFeature(bounding_boxes[:, 3]) + } + + example = tf.train.Example(features=tf.train.Features(feature=features)) + + parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', 'ymax') + result = parser.parse(example) + self.assertIsNotNone(result) + np_testing.assert_almost_equal(result, bounding_boxes) + + parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', + 'another_ymax') + result = parser.parse(example) + self.assertIsNone(result) + + +if __name__ == '__main__': + tf.test.main() diff --git a/model_hparams.py b/model_hparams.py new file mode 100644 index 0000000..b0d12fc --- /dev/null +++ b/model_hparams.py @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Hyperparameters for the object detection model in TF.learn. + +This file consolidates and documents the hyperparameters used by the model. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +def create_hparams(hparams_overrides=None): + """Returns hyperparameters, including any flag value overrides. + + Args: + hparams_overrides: Optional hparams overrides, represented as a + string containing comma-separated hparam_name=value pairs. + + Returns: + The hyperparameters as a tf.HParams object. + """ + hparams = tf.contrib.training.HParams( + # Whether a fine tuning checkpoint (provided in the pipeline config) + # should be loaded for training. + load_pretrained=True) + # Override any of the preceding hyperparameter values. + if hparams_overrides: + hparams = hparams.parse(hparams_overrides) + return hparams diff --git a/model_lib.py b/model_lib.py new file mode 100644 index 0000000..bd7c997 --- /dev/null +++ b/model_lib.py @@ -0,0 +1,879 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Constructs model, inputs, and training environment.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import functools +import os + +import tensorflow as tf + +from object_detection import eval_util +from object_detection import exporter as exporter_lib +from object_detection import inputs +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.builders import optimizer_builder +from object_detection.core import standard_fields as fields +from object_detection.utils import config_util +from object_detection.utils import label_map_util +from object_detection.utils import ops +from object_detection.utils import shape_utils +from object_detection.utils import variables_helper +from object_detection.utils import visualization_utils as vis_utils + +# A map of names to methods that help build the model. +MODEL_BUILD_UTIL_MAP = { + 'get_configs_from_pipeline_file': + config_util.get_configs_from_pipeline_file, + 'create_pipeline_proto_from_configs': + config_util.create_pipeline_proto_from_configs, + 'merge_external_params_with_configs': + config_util.merge_external_params_with_configs, + 'create_train_input_fn': + inputs.create_train_input_fn, + 'create_eval_input_fn': + inputs.create_eval_input_fn, + 'create_predict_input_fn': + inputs.create_predict_input_fn, + 'detection_model_fn_base': model_builder.build, +} + + +def _prepare_groundtruth_for_eval(detection_model, class_agnostic, + max_number_of_boxes): + """Extracts groundtruth data from detection_model and prepares it for eval. + + Args: + detection_model: A `DetectionModel` object. + class_agnostic: Whether the detections are class_agnostic. + max_number_of_boxes: Max number of groundtruth boxes. + + Returns: + A tuple of: + groundtruth: Dictionary with the following fields: + 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, + in normalized coordinates. + 'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed + classes. + 'groundtruth_masks': 4D float32 tensor of instance masks (if provided in + groundtruth) + 'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating + is_crowd annotations (if provided in groundtruth). + 'num_groundtruth_boxes': [batch_size] tensor containing the maximum number + of groundtruth boxes per image.. + class_agnostic: Boolean indicating whether detections are class agnostic. + """ + input_data_fields = fields.InputDataFields() + groundtruth_boxes = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.boxes)) + groundtruth_boxes_shape = tf.shape(groundtruth_boxes) + # For class-agnostic models, groundtruth one-hot encodings collapse to all + # ones. + if class_agnostic: + groundtruth_classes_one_hot = tf.ones( + [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1]) + else: + groundtruth_classes_one_hot = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.classes)) + label_id_offset = 1 # Applying label id offset (b/63711816) + groundtruth_classes = ( + tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset) + groundtruth = { + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes + } + if detection_model.groundtruth_has_field(fields.BoxListFields.masks): + groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.masks)) + + if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd): + groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)) + + groundtruth[input_data_fields.num_groundtruth_boxes] = ( + tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]])) + return groundtruth + + +def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): + """Unstacks all tensors in `tensor_dict` along 0th dimension. + + Unstacks tensor from the tensor dict along 0th dimension and returns a + tensor_dict containing values that are lists of unstacked, unpadded tensors. + + Tensors in the `tensor_dict` are expected to be of one of the three shapes: + 1. [batch_size] + 2. [batch_size, height, width, channels] + 3. [batch_size, num_boxes, d1, d2, ... dn] + + When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3 + above are sliced along the `num_boxes` dimension using the value in tensor + field.InputDataFields.num_groundtruth_boxes. + + Note that this function has a static list of input data fields and has to be + kept in sync with the InputDataFields defined in core/standard_fields.py + + Args: + tensor_dict: A dictionary of batched groundtruth tensors. + unpad_groundtruth_tensors: Whether to remove padding along `num_boxes` + dimension of the groundtruth tensors. + + Returns: + A dictionary where the keys are from fields.InputDataFields and values are + a list of unstacked (optionally unpadded) tensors. + + Raises: + ValueError: If unpad_tensors is True and `tensor_dict` does not contain + `num_groundtruth_boxes` tensor. + """ + unbatched_tensor_dict = { + key: tf.unstack(tensor) for key, tensor in tensor_dict.items() + } + if unpad_groundtruth_tensors: + if (fields.InputDataFields.num_groundtruth_boxes not in + unbatched_tensor_dict): + raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. ' + 'Keys available: {}'.format( + unbatched_tensor_dict.keys())) + unbatched_unpadded_tensor_dict = {} + unpad_keys = set([ + # List of input data fields that are padded along the num_boxes + # dimension. This list has to be kept in sync with InputDataFields in + # standard_fields.py. + fields.InputDataFields.groundtruth_instance_masks, + fields.InputDataFields.groundtruth_classes, + fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_keypoints, + fields.InputDataFields.groundtruth_group_of, + fields.InputDataFields.groundtruth_difficult, + fields.InputDataFields.groundtruth_is_crowd, + fields.InputDataFields.groundtruth_area, + fields.InputDataFields.groundtruth_weights + ]).intersection(set(unbatched_tensor_dict.keys())) + + for key in unpad_keys: + unpadded_tensor_list = [] + for num_gt, padded_tensor in zip( + unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], + unbatched_tensor_dict[key]): + tensor_shape = shape_utils.combined_static_and_dynamic_shape( + padded_tensor) + slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32) + slice_size = tf.stack( + [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]]) + unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size) + unpadded_tensor_list.append(unpadded_tensor) + unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list + unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict) + + return unbatched_tensor_dict + + +def provide_groundtruth(model, labels): + """Provides the labels to a model as groundtruth. + + This helper function extracts the corresponding boxes, classes, + keypoints, weights, masks, etc. from the labels, and provides it + as groundtruth to the models. + + Args: + model: The detection model to provide groundtruth to. + labels: The labels for the training or evaluation inputs. + """ + gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] + gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] + gt_masks_list = None + if fields.InputDataFields.groundtruth_instance_masks in labels: + gt_masks_list = labels[ + fields.InputDataFields.groundtruth_instance_masks] + gt_keypoints_list = None + if fields.InputDataFields.groundtruth_keypoints in labels: + gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] + gt_weights_list = None + if fields.InputDataFields.groundtruth_weights in labels: + gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] + gt_confidences_list = None + if fields.InputDataFields.groundtruth_confidences in labels: + gt_confidences_list = labels[ + fields.InputDataFields.groundtruth_confidences] + gt_is_crowd_list = None + if fields.InputDataFields.groundtruth_is_crowd in labels: + gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] + model.provide_groundtruth( + groundtruth_boxes_list=gt_boxes_list, + groundtruth_classes_list=gt_classes_list, + groundtruth_confidences_list=gt_confidences_list, + groundtruth_masks_list=gt_masks_list, + groundtruth_keypoints_list=gt_keypoints_list, + groundtruth_weights_list=gt_weights_list, + groundtruth_is_crowd_list=gt_is_crowd_list) + + +def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, + postprocess_on_cpu=False): + """Creates a model function for `Estimator`. + + Args: + detection_model_fn: Function that returns a `DetectionModel` instance. + configs: Dictionary of pipeline config objects. + hparams: `HParams` object. + use_tpu: Boolean indicating whether model should be constructed for + use on TPU. + postprocess_on_cpu: When use_tpu and postprocess_on_cpu is true, postprocess + is scheduled on the host cpu. + + Returns: + `model_fn` for `Estimator`. + """ + train_config = configs['train_config'] + eval_input_config = configs['eval_input_config'] + eval_config = configs['eval_config'] + + def model_fn(features, labels, mode, params=None): + """Constructs the object detection model. + + Args: + features: Dictionary of feature tensors, returned from `input_fn`. + labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, + otherwise None. + mode: Mode key from tf.estimator.ModeKeys. + params: Parameter dictionary passed from the estimator. + + Returns: + An `EstimatorSpec` that encapsulates the model and its serving + configurations. + """ + params = params or {} + total_loss, train_op, detections, export_outputs = None, None, None, None + is_training = mode == tf.estimator.ModeKeys.TRAIN + + # Make sure to set the Keras learning phase. True during training, + # False for inference. + tf.keras.backend.set_learning_phase(is_training) + # Set policy for mixed-precision training with Keras-based models. + if use_tpu and train_config.use_bfloat16: + from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top + # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0. + base_layer_utils.enable_v2_dtype_behavior() + tf.compat.v2.keras.mixed_precision.experimental.set_policy( + 'mixed_bfloat16') + detection_model = detection_model_fn( + is_training=is_training, add_summaries=(not use_tpu)) + scaffold_fn = None + + if mode == tf.estimator.ModeKeys.TRAIN: + labels = unstack_batch( + labels, + unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) + elif mode == tf.estimator.ModeKeys.EVAL: + # For evaling on train data, it is necessary to check whether groundtruth + # must be unpadded. + boxes_shape = ( + labels[fields.InputDataFields.groundtruth_boxes].get_shape() + .as_list()) + unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu + labels = unstack_batch( + labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + + if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): + provide_groundtruth(detection_model, labels) + + preprocessed_images = features[fields.InputDataFields.image] + if use_tpu and train_config.use_bfloat16: + with tf.contrib.tpu.bfloat16_scope(): + prediction_dict = detection_model.predict( + preprocessed_images, + features[fields.InputDataFields.true_image_shape]) + prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict) + else: + prediction_dict = detection_model.predict( + preprocessed_images, + features[fields.InputDataFields.true_image_shape]) + + def postprocess_wrapper(args): + return detection_model.postprocess(args[0], args[1]) + + if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): + if use_tpu and postprocess_on_cpu: + detections = tf.contrib.tpu.outside_compilation( + postprocess_wrapper, + (prediction_dict, + features[fields.InputDataFields.true_image_shape])) + else: + detections = postprocess_wrapper(( + prediction_dict, + features[fields.InputDataFields.true_image_shape])) + + if mode == tf.estimator.ModeKeys.TRAIN: + load_pretrained = hparams.load_pretrained if hparams else False + if train_config.fine_tune_checkpoint and load_pretrained: + if not train_config.fine_tune_checkpoint_type: + # train_config.from_detection_checkpoint field is deprecated. For + # backward compatibility, set train_config.fine_tune_checkpoint_type + # based on train_config.from_detection_checkpoint. + if train_config.from_detection_checkpoint: + train_config.fine_tune_checkpoint_type = 'detection' + else: + train_config.fine_tune_checkpoint_type = 'classification' + asg_map = detection_model.restore_map( + fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, + load_all_detection_checkpoint_vars=( + train_config.load_all_detection_checkpoint_vars)) + available_var_map = ( + variables_helper.get_variables_available_in_checkpoint( + asg_map, + train_config.fine_tune_checkpoint, + include_global_step=False)) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, + available_var_map) + return tf.train.Scaffold() + + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, + available_var_map) + + if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): + losses_dict = detection_model.loss( + prediction_dict, features[fields.InputDataFields.true_image_shape]) + losses = [loss_tensor for loss_tensor in losses_dict.values()] + if train_config.add_regularization_loss: + regularization_losses = detection_model.regularization_losses() + if use_tpu and train_config.use_bfloat16: + regularization_losses = ops.bfloat16_to_float32_nested( + regularization_losses) + if regularization_losses: + regularization_loss = tf.add_n( + regularization_losses, name='regularization_loss') + losses.append(regularization_loss) + losses_dict['Loss/regularization_loss'] = regularization_loss + total_loss = tf.add_n(losses, name='total_loss') + losses_dict['Loss/total_loss'] = total_loss + + if 'graph_rewriter_config' in configs: + graph_rewriter_fn = graph_rewriter_builder.build( + configs['graph_rewriter_config'], is_training=is_training) + graph_rewriter_fn() + + # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we + # can write learning rate summaries on TPU without host calls. + global_step = tf.train.get_or_create_global_step() + training_optimizer, optimizer_summary_vars = optimizer_builder.build( + train_config.optimizer) + + if mode == tf.estimator.ModeKeys.TRAIN: + if use_tpu: + training_optimizer = tf.contrib.tpu.CrossShardOptimizer( + training_optimizer) + + # Optionally freeze some layers by setting their gradients to be zero. + trainable_variables = None + include_variables = ( + train_config.update_trainable_variables + if train_config.update_trainable_variables else None) + exclude_variables = ( + train_config.freeze_variables + if train_config.freeze_variables else None) + trainable_variables = tf.contrib.framework.filter_variables( + tf.trainable_variables(), + include_patterns=include_variables, + exclude_patterns=exclude_variables) + + clip_gradients_value = None + if train_config.gradient_clipping_by_norm > 0: + clip_gradients_value = train_config.gradient_clipping_by_norm + + if not use_tpu: + for var in optimizer_summary_vars: + tf.summary.scalar(var.op.name, var) + summaries = [] if use_tpu else None + if train_config.summarize_gradients: + summaries = ['gradients', 'gradient_norm', 'global_gradient_norm'] + train_op = tf.contrib.layers.optimize_loss( + loss=total_loss, + global_step=global_step, + learning_rate=None, + clip_gradients=clip_gradients_value, + optimizer=training_optimizer, + update_ops=detection_model.updates(), + variables=trainable_variables, + summaries=summaries, + name='') # Preventing scope prefix on all variables. + + if mode == tf.estimator.ModeKeys.PREDICT: + exported_output = exporter_lib.add_output_tensor_nodes(detections) + export_outputs = { + tf.saved_model.signature_constants.PREDICT_METHOD_NAME: + tf.estimator.export.PredictOutput(exported_output) + } + + eval_metric_ops = None + scaffold = None + if mode == tf.estimator.ModeKeys.EVAL: + class_agnostic = ( + fields.DetectionResultFields.detection_classes not in detections) + groundtruth = _prepare_groundtruth_for_eval( + detection_model, class_agnostic, + eval_input_config.max_number_of_boxes) + use_original_images = fields.InputDataFields.original_image in features + if use_original_images: + eval_images = features[fields.InputDataFields.original_image] + true_image_shapes = tf.slice( + features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) + original_image_spatial_shapes = features[fields.InputDataFields + .original_image_spatial_shape] + else: + eval_images = features[fields.InputDataFields.image] + true_image_shapes = None + original_image_spatial_shapes = None + + eval_dict = eval_util.result_dict_for_batched_example( + eval_images, + features[inputs.HASH_KEY], + detections, + groundtruth, + class_agnostic=class_agnostic, + scale_to_absolute=True, + original_image_spatial_shapes=original_image_spatial_shapes, + true_image_shapes=true_image_shapes) + + if fields.InputDataFields.image_additional_channels in features: + eval_dict[fields.InputDataFields.image_additional_channels] = features[ + fields.InputDataFields.image_additional_channels] + + if class_agnostic: + category_index = label_map_util.create_class_agnostic_category_index() + else: + category_index = label_map_util.create_category_index_from_labelmap( + eval_input_config.label_map_path) + vis_metric_ops = None + if not use_tpu and use_original_images: + eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( + category_index, + max_examples_to_draw=eval_config.num_visualizations, + max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, + min_score_thresh=eval_config.min_score_threshold, + use_normalized_coordinates=False) + vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( + eval_dict) + + # Eval metrics on a single example. + eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( + eval_config, list(category_index.values()), eval_dict) + for loss_key, loss_tensor in iter(losses_dict.items()): + eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) + for var in optimizer_summary_vars: + eval_metric_ops[var.op.name] = (var, tf.no_op()) + if vis_metric_ops is not None: + eval_metric_ops.update(vis_metric_ops) + eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} + + if eval_config.use_moving_averages: + variable_averages = tf.train.ExponentialMovingAverage(0.0) + variables_to_restore = variable_averages.variables_to_restore() + keep_checkpoint_every_n_hours = ( + train_config.keep_checkpoint_every_n_hours) + saver = tf.train.Saver( + variables_to_restore, + max_to_keep=10000, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) + scaffold = tf.train.Scaffold(saver=saver) + + # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. + if use_tpu and mode != tf.estimator.ModeKeys.EVAL: + return tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + scaffold_fn=scaffold_fn, + predictions=detections, + loss=total_loss, + train_op=train_op, + eval_metrics=eval_metric_ops, + export_outputs=export_outputs) + else: + if scaffold is None: + keep_checkpoint_every_n_hours = ( + train_config.keep_checkpoint_every_n_hours) + saver = tf.train.Saver( + sharded=True, + max_to_keep=10000, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, + save_relative_paths=True) + tf.add_to_collection(tf.GraphKeys.SAVERS, saver) + scaffold = tf.train.Scaffold(saver=saver) + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=detections, + loss=total_loss, + train_op=train_op, + eval_metric_ops=eval_metric_ops, + export_outputs=export_outputs, + scaffold=scaffold) + + return model_fn + + +def create_estimator_and_inputs(run_config, + hparams, + pipeline_config_path, + config_override=None, + train_steps=None, + sample_1_of_n_eval_examples=None, + sample_1_of_n_eval_on_train_examples=1, + model_fn_creator=create_model_fn, + use_tpu_estimator=False, + use_tpu=False, + num_shards=1, + params=None, + override_eval_num_epochs=True, + save_final_config=False, + postprocess_on_cpu=False, + export_to_tpu=None, + **kwargs): + """Creates `Estimator`, input functions, and steps. + + Args: + run_config: A `RunConfig`. + hparams: A `HParams`. + pipeline_config_path: A path to a pipeline config file. + config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to + override the config from `pipeline_config_path`. + train_steps: Number of training steps. If None, the number of training steps + is set from the `TrainConfig` proto. + sample_1_of_n_eval_examples: Integer representing how often an eval example + should be sampled. If 1, will sample all examples. + sample_1_of_n_eval_on_train_examples: Similar to + `sample_1_of_n_eval_examples`, except controls the sampling of training + data for evaluation. + model_fn_creator: A function that creates a `model_fn` for `Estimator`. + Follows the signature: + + * Args: + * `detection_model_fn`: Function that returns `DetectionModel` instance. + * `configs`: Dictionary of pipeline config objects. + * `hparams`: `HParams` object. + * Returns: + `model_fn` for `Estimator`. + + use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False, + an `Estimator` will be returned. + use_tpu: Boolean, whether training and evaluation should run on TPU. Only + used if `use_tpu_estimator` is True. + num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator` + is True. + params: Parameter dictionary passed from the estimator. Only used if + `use_tpu_estimator` is True. + override_eval_num_epochs: Whether to overwrite the number of epochs to 1 for + eval_input. + save_final_config: Whether to save final config (obtained after applying + overrides) to `estimator.model_dir`. + postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true, + postprocess is scheduled on the host cpu. + export_to_tpu: When use_tpu and export_to_tpu are true, + `export_savedmodel()` exports a metagraph for serving on TPU besides the + one on CPU. + **kwargs: Additional keyword arguments for configuration override. + + Returns: + A dictionary with the following fields: + 'estimator': An `Estimator` or `TPUEstimator`. + 'train_input_fn': A training input function. + 'eval_input_fns': A list of all evaluation input functions. + 'eval_input_names': A list of names for each evaluation input. + 'eval_on_train_input_fn': An evaluation-on-train input function. + 'predict_input_fn': A prediction input function. + 'train_steps': Number of training steps. Either directly from input or from + configuration. + """ + get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ + 'get_configs_from_pipeline_file'] + merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ + 'merge_external_params_with_configs'] + create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ + 'create_pipeline_proto_from_configs'] + create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn'] + create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] + create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] + detection_model_fn_base = MODEL_BUILD_UTIL_MAP['detection_model_fn_base'] + + configs = get_configs_from_pipeline_file( + pipeline_config_path, config_override=config_override) + kwargs.update({ + 'train_steps': train_steps, + 'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu + }) + if sample_1_of_n_eval_examples >= 1: + kwargs.update({ + 'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples + }) + if override_eval_num_epochs: + kwargs.update({'eval_num_epochs': 1}) + tf.logging.warning( + 'Forced number of epochs for all eval validations to be 1.') + configs = merge_external_params_with_configs( + configs, hparams, kwargs_dict=kwargs) + model_config = configs['model'] + train_config = configs['train_config'] + train_input_config = configs['train_input_config'] + eval_config = configs['eval_config'] + eval_input_configs = configs['eval_input_configs'] + eval_on_train_input_config = copy.deepcopy(train_input_config) + eval_on_train_input_config.sample_1_of_n_examples = ( + sample_1_of_n_eval_on_train_examples) + if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1: + tf.logging.warning('Expected number of evaluation epochs is 1, but ' + 'instead encountered `eval_on_train_input_config' + '.num_epochs` = ' + '{}. Overwriting `num_epochs` to 1.'.format( + eval_on_train_input_config.num_epochs)) + eval_on_train_input_config.num_epochs = 1 + + # update train_steps from config but only when non-zero value is provided + if train_steps is None and train_config.num_steps != 0: + train_steps = train_config.num_steps + + detection_model_fn = functools.partial( + detection_model_fn_base, model_config=model_config) + + # Create the input functions for TRAIN/EVAL/PREDICT. + train_input_fn = create_train_input_fn( + train_config=train_config, + train_input_config=train_input_config, + model_config=model_config) + eval_input_fns = [ + create_eval_input_fn( + eval_config=eval_config, + eval_input_config=eval_input_config, + model_config=model_config) for eval_input_config in eval_input_configs + ] + eval_input_names = [ + eval_input_config.name for eval_input_config in eval_input_configs + ] + eval_on_train_input_fn = create_eval_input_fn( + eval_config=eval_config, + eval_input_config=eval_on_train_input_config, + model_config=model_config) + predict_input_fn = create_predict_input_fn( + model_config=model_config, predict_input_config=eval_input_configs[0]) + + # Read export_to_tpu from hparams if not passed. + if export_to_tpu is None: + export_to_tpu = hparams.get('export_to_tpu', False) + tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s', + use_tpu, export_to_tpu) + model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu, + postprocess_on_cpu) + if use_tpu_estimator: + estimator = tf.contrib.tpu.TPUEstimator( + model_fn=model_fn, + train_batch_size=train_config.batch_size, + # For each core, only batch size 1 is supported for eval. + eval_batch_size=num_shards * 1 if use_tpu else 1, + use_tpu=use_tpu, + config=run_config, + export_to_tpu=export_to_tpu, + eval_on_tpu=False, # Eval runs on CPU, so disable eval on TPU + params=params if params else {}) + else: + estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) + + # Write the as-run pipeline config to disk. + if run_config.is_chief and save_final_config: + pipeline_config_final = create_pipeline_proto_from_configs(configs) + config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir) + + return dict( + estimator=estimator, + train_input_fn=train_input_fn, + eval_input_fns=eval_input_fns, + eval_input_names=eval_input_names, + eval_on_train_input_fn=eval_on_train_input_fn, + predict_input_fn=predict_input_fn, + train_steps=train_steps) + + +def create_train_and_eval_specs(train_input_fn, + eval_input_fns, + eval_on_train_input_fn, + predict_input_fn, + train_steps, + eval_on_train_data=False, + final_exporter_name='Servo', + eval_spec_names=None): + """Creates a `TrainSpec` and `EvalSpec`s. + + Args: + train_input_fn: Function that produces features and labels on train data. + eval_input_fns: A list of functions that produce features and labels on eval + data. + eval_on_train_input_fn: Function that produces features and labels for + evaluation on train data. + predict_input_fn: Function that produces features for inference. + train_steps: Number of training steps. + eval_on_train_data: Whether to evaluate model on training data. Default is + False. + final_exporter_name: String name given to `FinalExporter`. + eval_spec_names: A list of string names for each `EvalSpec`. + + Returns: + Tuple of `TrainSpec` and list of `EvalSpecs`. If `eval_on_train_data` is + True, the last `EvalSpec` in the list will correspond to training data. The + rest EvalSpecs in the list are evaluation datas. + """ + train_spec = tf.estimator.TrainSpec( + input_fn=train_input_fn, max_steps=train_steps) + + if eval_spec_names is None: + eval_spec_names = [str(i) for i in range(len(eval_input_fns))] + + eval_specs = [] + for index, (eval_spec_name, eval_input_fn) in enumerate( + zip(eval_spec_names, eval_input_fns)): + # Uses final_exporter_name as exporter_name for the first eval spec for + # backward compatibility. + if index == 0: + exporter_name = final_exporter_name + else: + exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name) + exporter = tf.estimator.FinalExporter( + name=exporter_name, serving_input_receiver_fn=predict_input_fn) + eval_specs.append( + tf.estimator.EvalSpec( + name=eval_spec_name, + input_fn=eval_input_fn, + steps=None, + exporters=exporter)) + + if eval_on_train_data: + eval_specs.append( + tf.estimator.EvalSpec( + name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None)) + + return train_spec, eval_specs + + +def continuous_eval(estimator, model_dir, input_fn, train_steps, name): + """Perform continuous evaluation on checkpoints written to a model directory. + + Args: + estimator: Estimator object to use for evaluation. + model_dir: Model directory to read checkpoints for continuous evaluation. + input_fn: Input function to use for evaluation. + train_steps: Number of training steps. This is used to infer the last + checkpoint and stop evaluation loop. + name: Namescope for eval summary. + """ + + def terminate_eval(): + tf.logging.info('Terminating eval after 180 seconds of no checkpoints') + return True + + for ckpt in tf.contrib.training.checkpoints_iterator( + model_dir, min_interval_secs=180, timeout=None, + timeout_fn=terminate_eval): + + tf.logging.info('Starting Evaluation.') + try: + eval_results = estimator.evaluate( + input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name) + tf.logging.info('Eval results: %s' % eval_results) + + # Terminate eval job when final checkpoint is reached + current_step = int(os.path.basename(ckpt).split('-')[1]) + if current_step >= train_steps: + tf.logging.info( + 'Evaluation finished after training step %d' % current_step) + break + + except tf.errors.NotFoundError: + tf.logging.info( + 'Checkpoint %s no longer exists, skipping checkpoint' % ckpt) + + +def populate_experiment(run_config, + hparams, + pipeline_config_path, + train_steps=None, + eval_steps=None, + model_fn_creator=create_model_fn, + **kwargs): + """Populates an `Experiment` object. + + EXPERIMENT CLASS IS DEPRECATED. Please switch to + tf.estimator.train_and_evaluate. As an example, see model_main.py. + + Args: + run_config: A `RunConfig`. + hparams: A `HParams`. + pipeline_config_path: A path to a pipeline config file. + train_steps: Number of training steps. If None, the number of training steps + is set from the `TrainConfig` proto. + eval_steps: Number of evaluation steps per evaluation cycle. If None, the + number of evaluation steps is set from the `EvalConfig` proto. + model_fn_creator: A function that creates a `model_fn` for `Estimator`. + Follows the signature: + + * Args: + * `detection_model_fn`: Function that returns `DetectionModel` instance. + * `configs`: Dictionary of pipeline config objects. + * `hparams`: `HParams` object. + * Returns: + `model_fn` for `Estimator`. + + **kwargs: Additional keyword arguments for configuration override. + + Returns: + An `Experiment` that defines all aspects of training, evaluation, and + export. + """ + tf.logging.warning('Experiment is being deprecated. Please use ' + 'tf.estimator.train_and_evaluate(). See model_main.py for ' + 'an example.') + train_and_eval_dict = create_estimator_and_inputs( + run_config, + hparams, + pipeline_config_path, + train_steps=train_steps, + eval_steps=eval_steps, + model_fn_creator=model_fn_creator, + save_final_config=True, + **kwargs) + estimator = train_and_eval_dict['estimator'] + train_input_fn = train_and_eval_dict['train_input_fn'] + eval_input_fns = train_and_eval_dict['eval_input_fns'] + predict_input_fn = train_and_eval_dict['predict_input_fn'] + train_steps = train_and_eval_dict['train_steps'] + + export_strategies = [ + tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy( + serving_input_fn=predict_input_fn) + ] + + return tf.contrib.learn.Experiment( + estimator=estimator, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fns[0], + train_steps=train_steps, + eval_steps=None, + export_strategies=export_strategies, + eval_delay_secs=120, + ) diff --git a/model_lib_test.py b/model_lib_test.py new file mode 100644 index 0000000..c61fbb6 --- /dev/null +++ b/model_lib_test.py @@ -0,0 +1,430 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object detection model library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os + +import numpy as np +import tensorflow as tf + +from tensorflow.contrib.tpu.python.tpu import tpu_config +from tensorflow.contrib.tpu.python.tpu import tpu_estimator + +from object_detection import inputs +from object_detection import model_hparams +from object_detection import model_lib +from object_detection.builders import model_builder +from object_detection.core import standard_fields as fields +from object_detection.utils import config_util + + +# Model for test. Options are: +# 'ssd_inception_v2_pets', 'faster_rcnn_resnet50_pets' +MODEL_NAME_FOR_TEST = 'ssd_inception_v2_pets' + + +def _get_data_path(): + """Returns an absolute path to TFRecord file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data', + 'pets_examples.record') + + +def get_pipeline_config_path(model_name): + """Returns path to the local pipeline config file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'samples', + 'configs', model_name + '.config') + + +def _get_labelmap_path(): + """Returns an absolute path to label map file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'data', + 'pet_label_map.pbtxt') + + +def _get_configs_for_model(model_name): + """Returns configurations for model.""" + filename = get_pipeline_config_path(model_name) + data_path = _get_data_path() + label_map_path = _get_labelmap_path() + configs = config_util.get_configs_from_pipeline_file(filename) + override_dict = { + 'train_input_path': data_path, + 'eval_input_path': data_path, + 'label_map_path': label_map_path + } + configs = config_util.merge_external_params_with_configs( + configs, kwargs_dict=override_dict) + return configs + + +def _make_initializable_iterator(dataset): + """Creates an iterator, and initializes tables. + + Args: + dataset: A `tf.data.Dataset` object. + + Returns: + A `tf.data.Iterator`. + """ + iterator = dataset.make_initializable_iterator() + tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) + return iterator + + +class ModelLibTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + tf.reset_default_graph() + + def _assert_model_fn_for_train_eval(self, configs, mode, + class_agnostic=False): + model_config = configs['model'] + train_config = configs['train_config'] + with tf.Graph().as_default(): + if mode == 'train': + features, labels = _make_initializable_iterator( + inputs.create_train_input_fn(configs['train_config'], + configs['train_input_config'], + configs['model'])()).get_next() + model_mode = tf.estimator.ModeKeys.TRAIN + batch_size = train_config.batch_size + elif mode == 'eval': + features, labels = _make_initializable_iterator( + inputs.create_eval_input_fn(configs['eval_config'], + configs['eval_input_config'], + configs['model'])()).get_next() + model_mode = tf.estimator.ModeKeys.EVAL + batch_size = 1 + elif mode == 'eval_on_train': + features, labels = _make_initializable_iterator( + inputs.create_eval_input_fn(configs['eval_config'], + configs['train_input_config'], + configs['model'])()).get_next() + model_mode = tf.estimator.ModeKeys.EVAL + batch_size = 1 + + detection_model_fn = functools.partial( + model_builder.build, model_config=model_config, is_training=True) + + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + + model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams) + estimator_spec = model_fn(features, labels, model_mode) + + self.assertIsNotNone(estimator_spec.loss) + self.assertIsNotNone(estimator_spec.predictions) + if mode == 'eval' or mode == 'eval_on_train': + if class_agnostic: + self.assertNotIn('detection_classes', estimator_spec.predictions) + else: + detection_classes = estimator_spec.predictions['detection_classes'] + self.assertEqual(batch_size, detection_classes.shape.as_list()[0]) + self.assertEqual(tf.float32, detection_classes.dtype) + detection_boxes = estimator_spec.predictions['detection_boxes'] + detection_scores = estimator_spec.predictions['detection_scores'] + num_detections = estimator_spec.predictions['num_detections'] + self.assertEqual(batch_size, detection_boxes.shape.as_list()[0]) + self.assertEqual(tf.float32, detection_boxes.dtype) + self.assertEqual(batch_size, detection_scores.shape.as_list()[0]) + self.assertEqual(tf.float32, detection_scores.dtype) + self.assertEqual(tf.float32, num_detections.dtype) + if mode == 'eval': + self.assertIn('Detections_Left_Groundtruth_Right/0', + estimator_spec.eval_metric_ops) + if model_mode == tf.estimator.ModeKeys.TRAIN: + self.assertIsNotNone(estimator_spec.train_op) + return estimator_spec + + def _assert_model_fn_for_predict(self, configs): + model_config = configs['model'] + + with tf.Graph().as_default(): + features, _ = _make_initializable_iterator( + inputs.create_eval_input_fn(configs['eval_config'], + configs['eval_input_config'], + configs['model'])()).get_next() + detection_model_fn = functools.partial( + model_builder.build, model_config=model_config, is_training=False) + + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + + model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams) + estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT) + + self.assertIsNone(estimator_spec.loss) + self.assertIsNone(estimator_spec.train_op) + self.assertIsNotNone(estimator_spec.predictions) + self.assertIsNotNone(estimator_spec.export_outputs) + self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME, + estimator_spec.export_outputs) + + def test_model_fn_in_train_mode(self): + """Tests the model function in TRAIN mode.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + self._assert_model_fn_for_train_eval(configs, 'train') + + def test_model_fn_in_train_mode_freeze_all_variables(self): + """Tests model_fn TRAIN mode with all variables frozen.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + configs['train_config'].freeze_variables.append('.*') + with self.assertRaisesRegexp(ValueError, 'No variables to optimize'): + self._assert_model_fn_for_train_eval(configs, 'train') + + def test_model_fn_in_train_mode_freeze_all_included_variables(self): + """Tests model_fn TRAIN mode with all included variables frozen.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + train_config = configs['train_config'] + train_config.update_trainable_variables.append('FeatureExtractor') + train_config.freeze_variables.append('.*') + with self.assertRaisesRegexp(ValueError, 'No variables to optimize'): + self._assert_model_fn_for_train_eval(configs, 'train') + + def test_model_fn_in_train_mode_freeze_box_predictor(self): + """Tests model_fn TRAIN mode with FeatureExtractor variables frozen.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + train_config = configs['train_config'] + train_config.update_trainable_variables.append('FeatureExtractor') + train_config.update_trainable_variables.append('BoxPredictor') + train_config.freeze_variables.append('FeatureExtractor') + self._assert_model_fn_for_train_eval(configs, 'train') + + def test_model_fn_in_eval_mode(self): + """Tests the model function in EVAL mode.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + self._assert_model_fn_for_train_eval(configs, 'eval') + + def test_model_fn_in_eval_on_train_mode(self): + """Tests the model function in EVAL mode with train data.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + self._assert_model_fn_for_train_eval(configs, 'eval_on_train') + + def test_model_fn_in_predict_mode(self): + """Tests the model function in PREDICT mode.""" + configs = _get_configs_for_model(MODEL_NAME_FOR_TEST) + self._assert_model_fn_for_predict(configs) + + def test_create_estimator_and_inputs(self): + """Tests that Estimator and input function are constructed correctly.""" + run_config = tf.estimator.RunConfig() + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + train_steps = 20 + train_and_eval_dict = model_lib.create_estimator_and_inputs( + run_config, + hparams, + pipeline_config_path, + train_steps=train_steps) + estimator = train_and_eval_dict['estimator'] + train_steps = train_and_eval_dict['train_steps'] + self.assertIsInstance(estimator, tf.estimator.Estimator) + self.assertEqual(20, train_steps) + self.assertIn('train_input_fn', train_and_eval_dict) + self.assertIn('eval_input_fns', train_and_eval_dict) + self.assertIn('eval_on_train_input_fn', train_and_eval_dict) + + def test_create_estimator_with_default_train_eval_steps(self): + """Tests that number of train/eval defaults to config values.""" + run_config = tf.estimator.RunConfig() + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + config_train_steps = configs['train_config'].num_steps + train_and_eval_dict = model_lib.create_estimator_and_inputs( + run_config, hparams, pipeline_config_path) + estimator = train_and_eval_dict['estimator'] + train_steps = train_and_eval_dict['train_steps'] + + self.assertIsInstance(estimator, tf.estimator.Estimator) + self.assertEqual(config_train_steps, train_steps) + + def test_create_tpu_estimator_and_inputs(self): + """Tests that number of train/eval defaults to config values.""" + + run_config = tpu_config.RunConfig() + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + train_steps = 20 + train_and_eval_dict = model_lib.create_estimator_and_inputs( + run_config, + hparams, + pipeline_config_path, + train_steps=train_steps, + use_tpu_estimator=True) + estimator = train_and_eval_dict['estimator'] + train_steps = train_and_eval_dict['train_steps'] + + self.assertIsInstance(estimator, tpu_estimator.TPUEstimator) + self.assertEqual(20, train_steps) + + def test_create_train_and_eval_specs(self): + """Tests that `TrainSpec` and `EvalSpec` is created correctly.""" + run_config = tf.estimator.RunConfig() + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + train_steps = 20 + train_and_eval_dict = model_lib.create_estimator_and_inputs( + run_config, + hparams, + pipeline_config_path, + train_steps=train_steps) + train_input_fn = train_and_eval_dict['train_input_fn'] + eval_input_fns = train_and_eval_dict['eval_input_fns'] + eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] + predict_input_fn = train_and_eval_dict['predict_input_fn'] + train_steps = train_and_eval_dict['train_steps'] + + train_spec, eval_specs = model_lib.create_train_and_eval_specs( + train_input_fn, + eval_input_fns, + eval_on_train_input_fn, + predict_input_fn, + train_steps, + eval_on_train_data=True, + final_exporter_name='exporter', + eval_spec_names=['holdout']) + self.assertEqual(train_steps, train_spec.max_steps) + self.assertEqual(2, len(eval_specs)) + self.assertEqual(None, eval_specs[0].steps) + self.assertEqual('holdout', eval_specs[0].name) + self.assertEqual('exporter', eval_specs[0].exporters[0].name) + self.assertEqual(None, eval_specs[1].steps) + self.assertEqual('eval_on_train', eval_specs[1].name) + + def test_experiment(self): + """Tests that the `Experiment` object is constructed correctly.""" + run_config = tf.estimator.RunConfig() + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + experiment = model_lib.populate_experiment( + run_config, + hparams, + pipeline_config_path, + train_steps=10, + eval_steps=20) + self.assertEqual(10, experiment.train_steps) + self.assertEqual(None, experiment.eval_steps) + + +class UnbatchTensorsTest(tf.test.TestCase): + + def test_unbatch_without_unpadding(self): + image_placeholder = tf.placeholder(tf.float32, [2, None, None, None]) + groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, None, None]) + groundtruth_classes_placeholder = tf.placeholder(tf.float32, + [2, None, None]) + groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, None]) + + tensor_dict = { + fields.InputDataFields.image: + image_placeholder, + fields.InputDataFields.groundtruth_boxes: + groundtruth_boxes_placeholder, + fields.InputDataFields.groundtruth_classes: + groundtruth_classes_placeholder, + fields.InputDataFields.groundtruth_weights: + groundtruth_weights_placeholder + } + unbatched_tensor_dict = model_lib.unstack_batch( + tensor_dict, unpad_groundtruth_tensors=False) + + with self.test_session() as sess: + unbatched_tensor_dict_out = sess.run( + unbatched_tensor_dict, + feed_dict={ + image_placeholder: + np.random.rand(2, 4, 4, 3).astype(np.float32), + groundtruth_boxes_placeholder: + np.random.rand(2, 5, 4).astype(np.float32), + groundtruth_classes_placeholder: + np.random.rand(2, 5, 6).astype(np.float32), + groundtruth_weights_placeholder: + np.random.rand(2, 5).astype(np.float32) + }) + for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]: + self.assertAllEqual(image_out.shape, [4, 4, 3]) + for groundtruth_boxes_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_boxes]: + self.assertAllEqual(groundtruth_boxes_out.shape, [5, 4]) + for groundtruth_classes_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_classes]: + self.assertAllEqual(groundtruth_classes_out.shape, [5, 6]) + for groundtruth_weights_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_weights]: + self.assertAllEqual(groundtruth_weights_out.shape, [5]) + + def test_unbatch_and_unpad_groundtruth_tensors(self): + image_placeholder = tf.placeholder(tf.float32, [2, None, None, None]) + groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None]) + groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None]) + groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5]) + num_groundtruth_placeholder = tf.placeholder(tf.int32, [2]) + + tensor_dict = { + fields.InputDataFields.image: + image_placeholder, + fields.InputDataFields.groundtruth_boxes: + groundtruth_boxes_placeholder, + fields.InputDataFields.groundtruth_classes: + groundtruth_classes_placeholder, + fields.InputDataFields.groundtruth_weights: + groundtruth_weights_placeholder, + fields.InputDataFields.num_groundtruth_boxes: + num_groundtruth_placeholder + } + unbatched_tensor_dict = model_lib.unstack_batch( + tensor_dict, unpad_groundtruth_tensors=True) + with self.test_session() as sess: + unbatched_tensor_dict_out = sess.run( + unbatched_tensor_dict, + feed_dict={ + image_placeholder: + np.random.rand(2, 4, 4, 3).astype(np.float32), + groundtruth_boxes_placeholder: + np.random.rand(2, 5, 4).astype(np.float32), + groundtruth_classes_placeholder: + np.random.rand(2, 5, 6).astype(np.float32), + groundtruth_weights_placeholder: + np.random.rand(2, 5).astype(np.float32), + num_groundtruth_placeholder: + np.array([3, 3], np.int32) + }) + for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]: + self.assertAllEqual(image_out.shape, [4, 4, 3]) + for groundtruth_boxes_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_boxes]: + self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4]) + for groundtruth_classes_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_classes]: + self.assertAllEqual(groundtruth_classes_out.shape, [3, 6]) + for groundtruth_weights_out in unbatched_tensor_dict_out[ + fields.InputDataFields.groundtruth_weights]: + self.assertAllEqual(groundtruth_weights_out.shape, [3]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/model_lib_v2.py b/model_lib_v2.py new file mode 100644 index 0000000..d433ebc --- /dev/null +++ b/model_lib_v2.py @@ -0,0 +1,803 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Constructs model, inputs, and training environment.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import time + +import tensorflow as tf + +from object_detection import eval_util +from object_detection import inputs +from object_detection import model_lib +from object_detection.builders import model_builder +from object_detection.builders import optimizer_builder +from object_detection.core import standard_fields as fields +from object_detection.utils import config_util +from object_detection.utils import label_map_util +from object_detection.utils import ops +from object_detection.utils import variables_helper + +MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP + +### NOTE: This file is a wip. +### TODO(kaftan): Explore adding unit tests for individual methods +### TODO(kaftan): Add unit test that checks training on a single image w/ +#### groundtruth, and verfiy that loss goes to zero. +#### Possibly have version that takes it as the whole train & eval dataset, +#### & verify the loss output from the eval_loop method. +### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro + + +def _compute_losses_and_predictions_dicts( + model, features, labels, + add_regularization_loss=True): + """Computes the losses dict and predictions dict for a model on inputs. + + Args: + model: a DetectionModel (based on Keras). + features: Dictionary of feature tensors from the input dataset. + Should be in the format output by `inputs.train_input` and + `inputs.eval_input`. + features[fields.InputDataFields.image] is a [batch_size, H, W, C] + float32 tensor with preprocessed images. + features[HASH_KEY] is a [batch_size] int32 tensor representing unique + identifiers for the images. + features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] + int32 tensor representing the true image shapes, as preprocessed + images could be padded. + features[fields.InputDataFields.original_image] (optional) is a + [batch_size, H, W, C] float32 tensor with original images. + labels: A dictionary of groundtruth tensors post-unstacking. The original + labels are of the form returned by `inputs.train_input` and + `inputs.eval_input`. The shapes may have been modified by unstacking with + `model_lib.unstack_batch`. However, the dictionary includes the following + fields. + labels[fields.InputDataFields.num_groundtruth_boxes] is a + int32 tensor indicating the number of valid groundtruth boxes + per image. + labels[fields.InputDataFields.groundtruth_boxes] is a float32 tensor + containing the corners of the groundtruth boxes. + labels[fields.InputDataFields.groundtruth_classes] is a float32 + one-hot tensor of classes. + labels[fields.InputDataFields.groundtruth_weights] is a float32 tensor + containing groundtruth weights for the boxes. + -- Optional -- + labels[fields.InputDataFields.groundtruth_instance_masks] is a + float32 tensor containing only binary values, which represent + instance masks for objects. + labels[fields.InputDataFields.groundtruth_keypoints] is a + float32 tensor containing keypoints for each box. + add_regularization_loss: Whether or not to include the model's + regularization loss in the losses dictionary. + + Returns: + A tuple containing the losses dictionary (with the total loss under + the key 'Loss/total_loss'), and the predictions dictionary produced by + `model.predict`. + + """ + model_lib.provide_groundtruth(model, labels) + preprocessed_images = features[fields.InputDataFields.image] + + prediction_dict = model.predict( + preprocessed_images, + features[fields.InputDataFields.true_image_shape]) + prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict) + + losses_dict = model.loss( + prediction_dict, features[fields.InputDataFields.true_image_shape]) + losses = [loss_tensor for loss_tensor in losses_dict.values()] + if add_regularization_loss: + # TODO(kaftan): As we figure out mixed precision & bfloat 16, we may + ## need to convert these regularization losses from bfloat16 to float32 + ## as well. + regularization_losses = model.regularization_losses() + if regularization_losses: + regularization_losses = ops.bfloat16_to_float32_nested( + regularization_losses) + regularization_loss = tf.add_n( + regularization_losses, name='regularization_loss') + losses.append(regularization_loss) + losses_dict['Loss/regularization_loss'] = regularization_loss + + total_loss = tf.add_n(losses, name='total_loss') + losses_dict['Loss/total_loss'] = total_loss + + return losses_dict, prediction_dict + + +# TODO(kaftan): Explore removing learning_rate from this method & returning +## The full losses dict instead of just total_loss, then doing all summaries +## saving in a utility method called by the outer training loop. +# TODO(kaftan): Explore adding gradient summaries +def eager_train_step(detection_model, + features, + labels, + unpad_groundtruth_tensors, + optimizer, + learning_rate, + add_regularization_loss=True, + clip_gradients_value=None, + global_step=None, + num_replicas=1.0): + """Process a single training batch. + + This method computes the loss for the model on a single training batch, + while tracking the gradients with a gradient tape. It then updates the + model variables with the optimizer, clipping the gradients if + clip_gradients_value is present. + + This method can run eagerly or inside a tf.function. + + Args: + detection_model: A DetectionModel (based on Keras) to train. + features: Dictionary of feature tensors from the input dataset. + Should be in the format output by `inputs.train_input. + features[fields.InputDataFields.image] is a [batch_size, H, W, C] + float32 tensor with preprocessed images. + features[HASH_KEY] is a [batch_size] int32 tensor representing unique + identifiers for the images. + features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] + int32 tensor representing the true image shapes, as preprocessed + images could be padded. + features[fields.InputDataFields.original_image] (optional, not used + during training) is a + [batch_size, H, W, C] float32 tensor with original images. + labels: A dictionary of groundtruth tensors. This method unstacks + these labels using model_lib.unstack_batch. The stacked labels are of + the form returned by `inputs.train_input` and `inputs.eval_input`. + labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size] + int32 tensor indicating the number of valid groundtruth boxes + per image. + labels[fields.InputDataFields.groundtruth_boxes] is a + [batch_size, num_boxes, 4] float32 tensor containing the corners of + the groundtruth boxes. + labels[fields.InputDataFields.groundtruth_classes] is a + [batch_size, num_boxes, num_classes] float32 one-hot tensor of + classes. num_classes includes the background class. + labels[fields.InputDataFields.groundtruth_weights] is a + [batch_size, num_boxes] float32 tensor containing groundtruth weights + for the boxes. + -- Optional -- + labels[fields.InputDataFields.groundtruth_instance_masks] is a + [batch_size, num_boxes, H, W] float32 tensor containing only binary + values, which represent instance masks for objects. + labels[fields.InputDataFields.groundtruth_keypoints] is a + [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing + keypoints for each box. + unpad_groundtruth_tensors: A parameter passed to unstack_batch. + optimizer: The training optimizer that will update the variables. + learning_rate: The learning rate tensor for the current training step. + This is used only for TensorBoard logging purposes, it does not affect + model training. + add_regularization_loss: Whether or not to include the model's + regularization loss in the losses dictionary. + clip_gradients_value: If this is present, clip the gradients global norm + at this value using `tf.clip_by_global_norm`. + global_step: The current training step. Used for TensorBoard logging + purposes. This step is not updated by this function and must be + incremented separately. + num_replicas: The number of replicas in the current distribution strategy. + This is used to scale the total loss so that training in a distribution + strategy works correctly. + + Returns: + The total loss observed at this training step + """ + # """Execute a single training step in the TF v2 style loop.""" + is_training = True + + detection_model._is_training = is_training # pylint: disable=protected-access + tf.keras.backend.set_learning_phase(is_training) + + labels = model_lib.unstack_batch( + labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + + with tf.GradientTape() as tape: + losses_dict, _ = _compute_losses_and_predictions_dicts( + detection_model, features, labels, add_regularization_loss) + + total_loss = losses_dict['Loss/total_loss'] + + # Normalize loss for num replicas + total_loss = tf.math.divide(total_loss, + tf.constant(num_replicas, dtype=tf.float32)) + losses_dict['Loss/normalized_total_loss'] = total_loss + + for loss_type in losses_dict: + tf.compat.v2.summary.scalar( + loss_type, losses_dict[loss_type], step=global_step) + + trainable_variables = detection_model.trainable_variables + + gradients = tape.gradient(total_loss, trainable_variables) + + if clip_gradients_value: + gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients_value) + optimizer.apply_gradients(zip(gradients, trainable_variables)) + tf.compat.v2.summary.scalar('learning_rate', learning_rate, step=global_step) + + return total_loss + + +def load_fine_tune_checkpoint( + model, checkpoint_path, checkpoint_type, + load_all_detection_checkpoint_vars, input_dataset, + unpad_groundtruth_tensors): + """Load a fine tuning classification or detection checkpoint. + + To make sure the model variables are all built, this method first executes + the model by computing a dummy loss. (Models might not have built their + variables before their first execution) + + It then loads a variable-name based classification or detection checkpoint + that comes from converted TF 1.x slim model checkpoints. + + This method updates the model in-place and does not return a value. + + Args: + model: A DetectionModel (based on Keras) to load a fine-tuning + checkpoint for. + checkpoint_path: Directory with checkpoints file or path to checkpoint. + checkpoint_type: Whether to restore from a full detection + checkpoint (with compatible variable names) or to restore from a + classification checkpoint for initialization prior to training. + Valid values: `detection`, `classification`. + load_all_detection_checkpoint_vars: whether to load all variables (when + `fine_tune_checkpoint_type` is `detection`). If False, only variables + within the feature extractor scopes are included. Default False. + input_dataset: The tf.data Dataset the model is being trained on. Needed + to get the shapes for the dummy loss computation. + unpad_groundtruth_tensors: A parameter passed to unstack_batch. + """ + features, labels = iter(input_dataset).next() + + def _dummy_computation_fn(features, labels): + model._is_training = False # pylint: disable=protected-access + tf.keras.backend.set_learning_phase(False) + + labels = model_lib.unstack_batch( + labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + + return _compute_losses_and_predictions_dicts( + model, + features, + labels) + + strategy = tf.compat.v2.distribute.get_strategy() + strategy.experimental_run_v2( + _dummy_computation_fn, args=( + features, + labels, + )) + var_map = model.restore_map( + fine_tune_checkpoint_type=checkpoint_type, + load_all_detection_checkpoint_vars=( + load_all_detection_checkpoint_vars)) + available_var_map = variables_helper.get_variables_available_in_checkpoint( + var_map, + checkpoint_path, + include_global_step=False) + tf.train.init_from_checkpoint(checkpoint_path, + available_var_map) + + +def train_loop( + hparams, + pipeline_config_path, + model_dir, + config_override=None, + train_steps=None, + use_tpu=False, + save_final_config=False, + export_to_tpu=None, + checkpoint_every_n=1000, **kwargs): + """Trains a model using eager + functions. + + This method: + 1. Processes the pipeline configs + 2. (Optionally) saves the as-run config + 3. Builds the model & optimizer + 4. Gets the training input data + 5. Loads a fine-tuning detection or classification checkpoint if requested + 6. Loops over the train data, executing distributed training steps inside + tf.functions. + 7. Checkpoints the model every `checkpoint_every_n` training steps. + 8. Logs the training metrics as TensorBoard summaries. + + Args: + hparams: A `HParams`. + pipeline_config_path: A path to a pipeline config file. + model_dir: + The directory to save checkpoints and summaries to. + config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to + override the config from `pipeline_config_path`. + train_steps: Number of training steps. If None, the number of training steps + is set from the `TrainConfig` proto. + use_tpu: Boolean, whether training and evaluation should run on TPU. + save_final_config: Whether to save final config (obtained after applying + overrides) to `model_dir`. + export_to_tpu: When use_tpu and export_to_tpu are true, + `export_savedmodel()` exports a metagraph for serving on TPU besides the + one on CPU. If export_to_tpu is not provided, we will look for it in + hparams too. + checkpoint_every_n: + Checkpoint every n training steps. + **kwargs: Additional keyword arguments for configuration override. + """ + ## Parse the configs + get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ + 'get_configs_from_pipeline_file'] + merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ + 'merge_external_params_with_configs'] + create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ + 'create_pipeline_proto_from_configs'] + + configs = get_configs_from_pipeline_file( + pipeline_config_path, config_override=config_override) + kwargs.update({ + 'train_steps': train_steps, + 'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu + }) + configs = merge_external_params_with_configs( + configs, hparams, kwargs_dict=kwargs) + model_config = configs['model'] + train_config = configs['train_config'] + train_input_config = configs['train_input_config'] + + unpad_groundtruth_tensors = train_config.unpad_groundtruth_tensors + add_regularization_loss = train_config.add_regularization_loss + clip_gradients_value = None + if train_config.gradient_clipping_by_norm > 0: + clip_gradients_value = train_config.gradient_clipping_by_norm + + # update train_steps from config but only when non-zero value is provided + if train_steps is None and train_config.num_steps != 0: + train_steps = train_config.num_steps + + # Read export_to_tpu from hparams if not passed. + if export_to_tpu is None: + export_to_tpu = hparams.get('export_to_tpu', False) + tf.logging.info( + 'train_loop: use_tpu %s, export_to_tpu %s', use_tpu, + export_to_tpu) + + if kwargs['use_bfloat16']: + tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') + + # Parse the checkpoint fine tuning configs + if hparams.load_pretrained: + fine_tune_checkpoint_path = train_config.fine_tune_checkpoint + else: + fine_tune_checkpoint_path = None + load_all_detection_checkpoint_vars = ( + train_config.load_all_detection_checkpoint_vars) + # TODO(kaftan) (or anyone else): move this piece of config munging to + ## utils/config_util.py + if not train_config.fine_tune_checkpoint_type: + # train_config.from_detection_checkpoint field is deprecated. For + # backward compatibility, set train_config.fine_tune_checkpoint_type + # based on train_config.from_detection_checkpoint. + if train_config.from_detection_checkpoint: + train_config.fine_tune_checkpoint_type = 'detection' + else: + train_config.fine_tune_checkpoint_type = 'classification' + fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type + + # Write the as-run pipeline config to disk. + if save_final_config: + pipeline_config_final = create_pipeline_proto_from_configs(configs) + config_util.save_pipeline_config(pipeline_config_final, model_dir) + + # Build the model, optimizer, and training input + strategy = tf.compat.v2.distribute.get_strategy() + with strategy.scope(): + detection_model = model_builder.build( + model_config=model_config, is_training=True) + + # Create the inputs. + train_input = inputs.train_input( + train_config=train_config, + train_input_config=train_input_config, + model_config=model_config, + model=detection_model) + + train_input = strategy.experimental_distribute_dataset( + train_input.repeat()) + + global_step = tf.compat.v2.Variable( + 0, trainable=False, dtype=tf.compat.v2.dtypes.int64, name='global_step') + optimizer, (learning_rate,) = optimizer_builder.build( + train_config.optimizer, global_step=global_step) + + if callable(learning_rate): + learning_rate_fn = learning_rate + else: + learning_rate_fn = lambda: learning_rate + + ## Train the model + summary_writer = tf.compat.v2.summary.create_file_writer(model_dir + '/train') + with summary_writer.as_default(): + with strategy.scope(): + # Load a fine-tuning checkpoint. + if fine_tune_checkpoint_path: + load_fine_tune_checkpoint(detection_model, fine_tune_checkpoint_path, + fine_tune_checkpoint_type, + load_all_detection_checkpoint_vars, + train_input, + unpad_groundtruth_tensors) + + ckpt = tf.compat.v2.train.Checkpoint( + step=global_step, model=detection_model, optimizer=optimizer) + manager = tf.compat.v2.train.CheckpointManager( + ckpt, model_dir, max_to_keep=7) + ckpt.restore(manager.latest_checkpoint) + + def train_step_fn(features, labels): + return eager_train_step( + detection_model, + features, + labels, + unpad_groundtruth_tensors, + optimizer, + learning_rate=learning_rate_fn(), + add_regularization_loss=add_regularization_loss, + clip_gradients_value=clip_gradients_value, + global_step=global_step, + num_replicas=strategy.num_replicas_in_sync) + + @tf.function + def _dist_train_step(data_iterator): + """A distributed train step.""" + features, labels = data_iterator.next() + per_replica_losses = strategy.experimental_run_v2( + train_step_fn, args=( + features, + labels, + )) + # TODO(anjalisridhar): explore if it is safe to remove the + ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean + mean_loss = strategy.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) + return mean_loss + + train_input_iter = iter(train_input) + for _ in range(train_steps - global_step.value()): + start_time = time.time() + + loss = _dist_train_step(train_input_iter) + global_step.assign_add(1) + end_time = time.time() + + tf.compat.v2.summary.scalar( + 'steps_per_sec', 1.0 / (end_time - start_time), + step=global_step) + if (int(global_step.value()) % 100) == 0: + tf.logging.info( + 'Step {} time taken {:.3f}s loss={:.3f}'.format( + global_step.value(), end_time - start_time, loss)) + + if int(global_step.value()) % checkpoint_every_n == 0: + manager.save() + + +def eager_eval_loop( + detection_model, + configs, + eval_dataset, + use_tpu=False, + postprocess_on_cpu=False, + global_step=None): + """Evaluate the model eagerly on the evaluation dataset. + + This method will compute the evaluation metrics specified in the configs on + the entire evaluation dataset, then return the metrics. It will also log + the metrics to TensorBoard + + Args: + detection_model: A DetectionModel (based on Keras) to evaluate. + configs: Object detection configs that specify the evaluators that should + be used, as well as whether regularization loss should be included and + if bfloat16 should be used on TPUs. + eval_dataset: Dataset containing evaluation data. + use_tpu: Whether a TPU is being used to execute the model for evaluation. + postprocess_on_cpu: Whether model postprocessing should happen on + the CPU when using a TPU to execute the model. + global_step: A variable containing the training step this model was trained + to. Used for logging purposes. + + Returns: + A dict of evaluation metrics representing the results of this evaluation. + """ + train_config = configs['train_config'] + eval_input_config = configs['eval_input_config'] + eval_config = configs['eval_config'] + add_regularization_loss = train_config.add_regularization_loss + + is_training = False + detection_model._is_training = is_training # pylint: disable=protected-access + tf.keras.backend.set_learning_phase(is_training) + + evaluator_options = eval_util.evaluator_options_from_eval_config( + eval_config) + + class_agnostic_category_index = ( + label_map_util.create_class_agnostic_category_index()) + class_agnostic_evaluators = eval_util.get_evaluators( + eval_config, + list(class_agnostic_category_index.values()), + evaluator_options) + + class_aware_evaluators = None + if eval_input_config.label_map_path: + class_aware_category_index = ( + label_map_util.create_category_index_from_labelmap( + eval_input_config.label_map_path)) + class_aware_evaluators = eval_util.get_evaluators( + eval_config, + list(class_aware_category_index.values()), + evaluator_options) + + evaluators = None + loss_metrics = {} + + @tf.function + def compute_eval_dict(features, labels): + """Compute the evaluation result on an image.""" + # For evaling on train data, it is necessary to check whether groundtruth + # must be unpadded. + boxes_shape = ( + labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list()) + unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu + labels = model_lib.unstack_batch( + labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + + losses_dict, prediction_dict = _compute_losses_and_predictions_dicts( + detection_model, features, labels, add_regularization_loss) + + def postprocess_wrapper(args): + return detection_model.postprocess(args[0], args[1]) + + # TODO(kaftan): Depending on how postprocessing will work for TPUS w/ + ## TPUStrategy, may be good to move wrapping to a utility method + if use_tpu and postprocess_on_cpu: + detections = tf.contrib.tpu.outside_compilation( + postprocess_wrapper, + (prediction_dict, features[fields.InputDataFields.true_image_shape])) + else: + detections = postprocess_wrapper( + (prediction_dict, features[fields.InputDataFields.true_image_shape])) + + class_agnostic = ( + fields.DetectionResultFields.detection_classes not in detections) + # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util + ## and call this from there. + groundtruth = model_lib._prepare_groundtruth_for_eval( # pylint: disable=protected-access + detection_model, class_agnostic, eval_input_config.max_number_of_boxes) + use_original_images = fields.InputDataFields.original_image in features + if use_original_images: + eval_images = features[fields.InputDataFields.original_image] + true_image_shapes = tf.slice( + features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) + original_image_spatial_shapes = features[ + fields.InputDataFields.original_image_spatial_shape] + else: + eval_images = features[fields.InputDataFields.image] + true_image_shapes = None + original_image_spatial_shapes = None + + eval_dict = eval_util.result_dict_for_batched_example( + eval_images, + features[inputs.HASH_KEY], + detections, + groundtruth, + class_agnostic=class_agnostic, + scale_to_absolute=True, + original_image_spatial_shapes=original_image_spatial_shapes, + true_image_shapes=true_image_shapes) + + return eval_dict, losses_dict, class_agnostic + + for i, (features, labels) in enumerate(eval_dataset): + eval_dict, losses_dict, class_agnostic = compute_eval_dict(features, labels) + + if i % 100 == 0: + tf.logging.info('Finished eval step %d', i) + + if evaluators is None: + if class_agnostic: + evaluators = class_agnostic_evaluators + else: + evaluators = class_aware_evaluators + + for evaluator in evaluators: + evaluator.add_eval_dict(eval_dict) + + for loss_key, loss_tensor in iter(losses_dict.items()): + if loss_key not in loss_metrics: + loss_metrics[loss_key] = tf.keras.metrics.Mean() + loss_metrics[loss_key].update_state(loss_tensor) + + eval_metrics = {} + + for evaluator in evaluators: + eval_metrics.update(evaluator.evaluate()) + for loss_key in loss_metrics: + eval_metrics[loss_key] = loss_metrics[loss_key].result() + + eval_metrics = {str(k): v for k, v in eval_metrics.items()} + for k in eval_metrics: + tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step) + + return eval_metrics + + +def eval_continuously( + hparams, + pipeline_config_path, + config_override=None, + train_steps=None, + sample_1_of_n_eval_examples=1, + sample_1_of_n_eval_on_train_examples=1, + use_tpu=False, + override_eval_num_epochs=True, + postprocess_on_cpu=False, + export_to_tpu=None, + model_dir=None, + checkpoint_dir=None, + wait_interval=180, + **kwargs): + """Run continuous evaluation of a detection model eagerly. + + This method builds the model, and continously restores it from the most + recent training checkpoint in the checkpoint directory & evaluates it + on the evaluation data. + + Args: + hparams: A `HParams`. + pipeline_config_path: A path to a pipeline config file. + config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to + override the config from `pipeline_config_path`. + train_steps: Number of training steps. If None, the number of training steps + is set from the `TrainConfig` proto. + sample_1_of_n_eval_examples: Integer representing how often an eval example + should be sampled. If 1, will sample all examples. + sample_1_of_n_eval_on_train_examples: Similar to + `sample_1_of_n_eval_examples`, except controls the sampling of training + data for evaluation. + use_tpu: Boolean, whether training and evaluation should run on TPU. + override_eval_num_epochs: Whether to overwrite the number of epochs to 1 for + eval_input. + postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true, + postprocess is scheduled on the host cpu. + export_to_tpu: When use_tpu and export_to_tpu are true, + `export_savedmodel()` exports a metagraph for serving on TPU besides the + one on CPU. If export_to_tpu is not provided, we will look for it in + hparams too. + model_dir: + Directory to output resulting evaluation summaries to. + checkpoint_dir: + Directory that contains the training checkpoints. + wait_interval: + Terminate evaluation in no new checkpoints arrive within this wait + interval (in seconds). + **kwargs: Additional keyword arguments for configuration override. + """ + get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ + 'get_configs_from_pipeline_file'] + merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ + 'merge_external_params_with_configs'] + + configs = get_configs_from_pipeline_file( + pipeline_config_path, config_override=config_override) + kwargs.update({ + 'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples, + 'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu + }) + if train_steps is not None: + kwargs['train_steps'] = train_steps + if override_eval_num_epochs: + kwargs.update({'eval_num_epochs': 1}) + tf.logging.warning( + 'Forced number of epochs for all eval validations to be 1.') + configs = merge_external_params_with_configs( + configs, hparams, kwargs_dict=kwargs) + model_config = configs['model'] + train_input_config = configs['train_input_config'] + eval_config = configs['eval_config'] + eval_input_configs = configs['eval_input_configs'] + eval_on_train_input_config = copy.deepcopy(train_input_config) + eval_on_train_input_config.sample_1_of_n_examples = ( + sample_1_of_n_eval_on_train_examples) + if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1: + tf.logging.warning('Expected number of evaluation epochs is 1, but ' + 'instead encountered `eval_on_train_input_config' + '.num_epochs` = ' + '{}. Overwriting `num_epochs` to 1.'.format( + eval_on_train_input_config.num_epochs)) + eval_on_train_input_config.num_epochs = 1 + + if kwargs['use_bfloat16']: + tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') + + detection_model = model_builder.build( + model_config=model_config, is_training=True) + + # Create the inputs. + eval_inputs = [] + for eval_input_config in eval_input_configs: + next_eval_input = inputs.eval_input( + eval_config=eval_config, + eval_input_config=eval_input_config, + model_config=model_config, + model=detection_model) + eval_inputs.append((eval_input_config.name, next_eval_input)) + + # Read export_to_tpu from hparams if not passed. + if export_to_tpu is None: + export_to_tpu = hparams.get('export_to_tpu', False) + tf.logging.info('eval_continuously: use_tpu %s, export_to_tpu %s', + use_tpu, export_to_tpu) + + global_step = tf.compat.v2.Variable( + 0, trainable=False, dtype=tf.compat.v2.dtypes.int64) + + prev_checkpoint = None + waiting = False + while True: + ckpt = tf.compat.v2.train.Checkpoint( + step=global_step, model=detection_model) + manager = tf.compat.v2.train.CheckpointManager( + ckpt, checkpoint_dir, max_to_keep=3) + + latest_checkpoint = manager.latest_checkpoint + if prev_checkpoint == latest_checkpoint: + if prev_checkpoint is None: + tf.logging.info('No checkpoints found yet. Trying again in %s seconds.' + % wait_interval) + time.sleep(wait_interval) + else: + if waiting: + tf.logging.info('Terminating eval after %s seconds of no new ' + 'checkpoints.' % wait_interval) + break + else: + tf.logging.info('No new checkpoint found. Will try again ' + 'in %s seconds and terminate if no checkpoint ' + 'appears.' % wait_interval) + waiting = True + time.sleep(wait_interval) + else: + tf.logging.info('New checkpoint found. Starting evaluation.') + waiting = False + prev_checkpoint = latest_checkpoint + ckpt.restore(latest_checkpoint) + + for eval_name, eval_input in eval_inputs: + summary_writer = tf.compat.v2.summary.create_file_writer( + model_dir + '/eval' + eval_name) + with summary_writer.as_default(): + eager_eval_loop( + detection_model, + configs, + eval_input, + use_tpu=use_tpu, + postprocess_on_cpu=postprocess_on_cpu, + global_step=global_step) diff --git a/model_lib_v2_test.py b/model_lib_v2_test.py new file mode 100644 index 0000000..af650a3 --- /dev/null +++ b/model_lib_v2_test.py @@ -0,0 +1,104 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object detection model library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import tensorflow as tf + +from object_detection import model_hparams +from object_detection import model_lib_v2 +from object_detection.utils import config_util + + +# Model for test. Current options are: +# 'ssd_mobilenet_v2_pets_keras' +MODEL_NAME_FOR_TEST = 'ssd_mobilenet_v2_pets_keras' + + +def _get_data_path(): + """Returns an absolute path to TFRecord file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data', + 'pets_examples.record') + + +def get_pipeline_config_path(model_name): + """Returns path to the local pipeline config file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'samples', + 'configs', model_name + '.config') + + +def _get_labelmap_path(): + """Returns an absolute path to label map file.""" + return os.path.join(tf.resource_loader.get_data_files_path(), 'data', + 'pet_label_map.pbtxt') + + +def _get_config_kwarg_overrides(): + """Returns overrides to the configs that insert the correct local paths.""" + data_path = _get_data_path() + label_map_path = _get_labelmap_path() + return { + 'train_input_path': data_path, + 'eval_input_path': data_path, + 'label_map_path': label_map_path + } + + +def _get_configs_for_model(model_name): + """Returns configurations for model.""" + filename = get_pipeline_config_path(model_name) + configs = config_util.get_configs_from_pipeline_file(filename) + configs = config_util.merge_external_params_with_configs( + configs, kwargs_dict=_get_config_kwarg_overrides()) + return configs + + +class ModelLibTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + tf.keras.backend.clear_session() + + def test_train_loop_then_eval_loop(self): + """Tests that Estimator and input function are constructed correctly.""" + hparams = model_hparams.create_hparams( + hparams_overrides='load_pretrained=false') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + config_kwarg_overrides = _get_config_kwarg_overrides() + model_dir = tf.test.get_temp_dir() + + train_steps = 2 + model_lib_v2.train_loop( + hparams, + pipeline_config_path, + model_dir=model_dir, + train_steps=train_steps, + checkpoint_every_n=1, + **config_kwarg_overrides) + + model_lib_v2.eval_continuously( + hparams, + pipeline_config_path, + model_dir=model_dir, + checkpoint_dir=model_dir, + train_steps=train_steps, + wait_interval=10, + **config_kwarg_overrides) + diff --git a/model_main.py b/model_main.py new file mode 100644 index 0000000..7ab47f2 --- /dev/null +++ b/model_main.py @@ -0,0 +1,111 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Binary to run train and evaluation on object detection model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags + +import tensorflow as tf + +from object_detection import model_hparams +from object_detection import model_lib + +tf.logging.set_verbosity(tf.logging.INFO) + +flags.DEFINE_string( + 'model_dir', None, 'Path to output model directory ' + 'where event and checkpoint files will be written.') +flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config ' + 'file.') +flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') +flags.DEFINE_boolean('eval_training_data', False, + 'If training data should be evaluated for this job. Note ' + 'that one call only use this in eval-only mode, and ' + '`checkpoint_dir` must be supplied.') +flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of ' + 'every n eval input examples, where n is provided.') +flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample ' + 'one of every n train input examples for evaluation, ' + 'where n is provided. This is only used if ' + '`eval_training_data` is True.') +flags.DEFINE_string( + 'hparams_overrides', None, 'Hyperparameter overrides, ' + 'represented as a string containing comma-separated ' + 'hparam_name=value pairs.') +flags.DEFINE_string( + 'checkpoint_dir', None, 'Path to directory holding a checkpoint. If ' + '`checkpoint_dir` is provided, this binary operates in eval-only mode, ' + 'writing resulting metrics to `model_dir`.') +flags.DEFINE_boolean( + 'run_once', False, 'If running in eval-only mode, whether to run just ' + 'one round of eval vs running continuously (default).' +) +FLAGS = flags.FLAGS + + +def main(unused_argv): + flags.mark_flag_as_required('model_dir') + flags.mark_flag_as_required('pipeline_config_path') + config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) + + train_and_eval_dict = model_lib.create_estimator_and_inputs( + run_config=config, + hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), + pipeline_config_path=FLAGS.pipeline_config_path, + train_steps=FLAGS.num_train_steps, + sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, + sample_1_of_n_eval_on_train_examples=( + FLAGS.sample_1_of_n_eval_on_train_examples)) + estimator = train_and_eval_dict['estimator'] + train_input_fn = train_and_eval_dict['train_input_fn'] + eval_input_fns = train_and_eval_dict['eval_input_fns'] + eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] + predict_input_fn = train_and_eval_dict['predict_input_fn'] + train_steps = train_and_eval_dict['train_steps'] + + if FLAGS.checkpoint_dir: + if FLAGS.eval_training_data: + name = 'training_data' + input_fn = eval_on_train_input_fn + else: + name = 'validation_data' + # The first eval input will be evaluated. + input_fn = eval_input_fns[0] + if FLAGS.run_once: + estimator.evaluate(input_fn, + steps=None, + checkpoint_path=tf.train.latest_checkpoint( + FLAGS.checkpoint_dir)) + else: + model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, + train_steps, name) + else: + train_spec, eval_specs = model_lib.create_train_and_eval_specs( + train_input_fn, + eval_input_fns, + eval_on_train_input_fn, + predict_input_fn, + train_steps, + eval_on_train_data=False) + + # Currently only a single Eval Spec is allowed. + tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) + + +if __name__ == '__main__': + tf.app.run() diff --git a/models/faster_rcnn_inception_resnet_v2_feature_extractor.py b/models/faster_rcnn_inception_resnet_v2_feature_extractor.py new file mode 100644 index 0000000..7984735 --- /dev/null +++ b/models/faster_rcnn_inception_resnet_v2_feature_extractor.py @@ -0,0 +1,214 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Inception Resnet v2 Faster R-CNN implementation. + +See "Inception-v4, Inception-ResNet and the Impact of Residual Connections on +Learning" by Szegedy et al. (https://arxiv.org/abs/1602.07261) +as well as +"Speed/accuracy trade-offs for modern convolutional object detectors" by +Huang et al. (https://arxiv.org/abs/1611.10012) +""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.utils import variables_helper +from nets import inception_resnet_v2 + +slim = contrib_slim + + +class FasterRCNNInceptionResnetV2FeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): + """Faster R-CNN with Inception Resnet v2 feature extractor implementation.""" + + def __init__(self, + is_training, + first_stage_features_stride, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0): + """Constructor. + + Args: + is_training: See base class. + first_stage_features_stride: See base class. + batch_norm_trainable: See base class. + reuse_weights: See base class. + weight_decay: See base class. + + Raises: + ValueError: If `first_stage_features_stride` is not 8 or 16. + """ + if first_stage_features_stride != 8 and first_stage_features_stride != 16: + raise ValueError('`first_stage_features_stride` must be 8 or 16.') + super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__( + is_training, first_stage_features_stride, batch_norm_trainable, + reuse_weights, weight_decay) + + def preprocess(self, resized_inputs): + """Faster R-CNN with Inception Resnet v2 preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: A [batch, height_in, width_in, channels] float32 tensor + representing a batch of images with values between 0 and 255.0. + + Returns: + preprocessed_inputs: A [batch, height_out, width_out, channels] float32 + tensor representing a batch of images. + + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def _extract_proposal_features(self, preprocessed_inputs, scope): + """Extracts first stage RPN features. + + Extracts features using the first half of the Inception Resnet v2 network. + We construct the network in `align_feature_maps=True` mode, which means + that all VALID paddings in the network are changed to SAME padding so that + the feature maps are aligned. + + Args: + preprocessed_inputs: A [batch, height, width, channels] float32 tensor + representing a batch of images. + scope: A scope name. + + Returns: + rpn_feature_map: A tensor with shape [batch, height, width, depth] + Raises: + InvalidArgumentError: If the spatial size of `preprocessed_inputs` + (height or width) is less than 33. + ValueError: If the created network is missing the required activation. + """ + if len(preprocessed_inputs.get_shape().as_list()) != 4: + raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' + 'tensor of shape %s' % preprocessed_inputs.get_shape()) + + with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( + weight_decay=self._weight_decay)): + # Forces is_training to False to disable batch norm update. + with slim.arg_scope([slim.batch_norm], + is_training=self._train_batch_norm): + with tf.variable_scope('InceptionResnetV2', + reuse=self._reuse_weights) as scope: + return inception_resnet_v2.inception_resnet_v2_base( + preprocessed_inputs, final_endpoint='PreAuxLogits', + scope=scope, output_stride=self._first_stage_features_stride, + align_feature_maps=True) + + def _extract_box_classifier_features(self, proposal_feature_maps, scope): + """Extracts second stage box classifier features. + + This function reconstructs the "second half" of the Inception ResNet v2 + network after the part defined in `_extract_proposal_features`. + + Args: + proposal_feature_maps: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, crop_height, crop_width, depth] + representing the feature map cropped to each proposal. + scope: A scope name. + + Returns: + proposal_classifier_features: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, height, width, depth] + representing box classifier features for each proposal. + """ + with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights): + with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( + weight_decay=self._weight_decay)): + # Forces is_training to False to disable batch norm update. + with slim.arg_scope([slim.batch_norm], + is_training=self._train_batch_norm): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope('Mixed_7a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(proposal_feature_maps, + 256, 1, scope='Conv2d_0a_1x1') + tower_conv_1 = slim.conv2d( + tower_conv, 384, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1 = slim.conv2d( + proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d( + tower_conv1, 288, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2 = slim.conv2d( + proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d( + tower_conv2_1, 320, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.max_pool2d( + proposal_feature_maps, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat( + [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) + net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20) + net = inception_resnet_v2.block8(net, activation_fn=None) + proposal_classifier_features = slim.conv2d( + net, 1536, 1, scope='Conv2d_7b_1x1') + return proposal_classifier_features + + def restore_from_classification_checkpoint_fn( + self, + first_stage_feature_extractor_scope, + second_stage_feature_extractor_scope): + """Returns a map of variables to load from a foreign checkpoint. + + Note that this overrides the default implementation in + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for + InceptionResnetV2 checkpoints. + + TODO(jonathanhuang,rathodv): revisit whether it's possible to force the + `Repeat` namescope as created in `_extract_box_classifier_features` to + start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can + be used. + + Args: + first_stage_feature_extractor_scope: A scope name for the first stage + feature extractor. + second_stage_feature_extractor_scope: A scope name for the second stage + feature extractor. + + Returns: + A dict mapping variable names (to load from a checkpoint) to variables in + the model graph. + """ + + variables_to_restore = {} + for variable in variables_helper.get_global_variables_safely(): + if variable.op.name.startswith( + first_stage_feature_extractor_scope): + var_name = variable.op.name.replace( + first_stage_feature_extractor_scope + '/', '') + variables_to_restore[var_name] = variable + if variable.op.name.startswith( + second_stage_feature_extractor_scope): + var_name = variable.op.name.replace( + second_stage_feature_extractor_scope + + '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2') + var_name = var_name.replace( + second_stage_feature_extractor_scope + '/', '') + variables_to_restore[var_name] = variable + return variables_to_restore diff --git a/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py b/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py new file mode 100644 index 0000000..1d9f088 --- /dev/null +++ b/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py @@ -0,0 +1,109 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor.""" + +import tensorflow as tf + +from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res + + +class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase): + + def _build_feature_extractor(self, first_stage_features_stride): + return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor( + is_training=False, + first_stage_features_stride=first_stage_features_stride, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0) + + def test_extract_proposal_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 299, 299, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 19, 19, 1088]) + + def test_extract_proposal_features_stride_eight(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=8) + preprocessed_inputs = tf.random_uniform( + [1, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 28, 28, 1088]) + + def test_extract_proposal_features_half_size_input(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 112, 112, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 7, 7, 1088]) + + def test_extract_proposal_features_dies_on_invalid_stride(self): + with self.assertRaises(ValueError): + self._build_feature_extractor(first_stage_features_stride=99) + + def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [224, 224, 3], maxval=255, dtype=tf.float32) + with self.assertRaises(ValueError): + feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + + def test_extract_box_classifier_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + proposal_feature_maps = tf.random_uniform( + [2, 17, 17, 1088], maxval=255, dtype=tf.float32) + proposal_classifier_features = ( + feature_extractor.extract_box_classifier_features( + proposal_feature_maps, scope='TestScope')) + features_shape = tf.shape(proposal_classifier_features) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [2, 8, 8, 1536]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py b/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py new file mode 100644 index 0000000..f4df2a0 --- /dev/null +++ b/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py @@ -0,0 +1,109 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for models.faster_rcnn_inception_resnet_v2_keras_feature_extractor.""" + +import tensorflow as tf + +from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res + + +class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase): + + def _build_feature_extractor(self, first_stage_features_stride): + return frcnn_inc_res.FasterRCNNInceptionResnetV2KerasFeatureExtractor( + is_training=False, + first_stage_features_stride=first_stage_features_stride, + batch_norm_trainable=False, + weight_decay=0.0) + + def test_extract_proposal_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 299, 299, 3], maxval=255, dtype=tf.float32) + rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model( + name='TestScope')(preprocessed_inputs) + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 19, 19, 1088]) + + def test_extract_proposal_features_stride_eight(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=8) + preprocessed_inputs = tf.random_uniform( + [1, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model( + name='TestScope')(preprocessed_inputs) + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 28, 28, 1088]) + + def test_extract_proposal_features_half_size_input(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 112, 112, 3], maxval=255, dtype=tf.float32) + rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model( + name='TestScope')(preprocessed_inputs) + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 7, 7, 1088]) + + def test_extract_proposal_features_dies_on_invalid_stride(self): + with self.assertRaises(ValueError): + self._build_feature_extractor(first_stage_features_stride=99) + + def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [224, 224, 3], maxval=255, dtype=tf.float32) + with self.assertRaises(ValueError): + feature_extractor.get_proposal_feature_extractor_model( + name='TestScope')(preprocessed_inputs) + + def test_extract_box_classifier_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + proposal_feature_maps = tf.random_uniform( + [2, 17, 17, 1088], maxval=255, dtype=tf.float32) + model = feature_extractor.get_box_classifier_feature_extractor_model( + name='TestScope') + proposal_classifier_features = ( + model(proposal_feature_maps)) + features_shape = tf.shape(proposal_classifier_features) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [2, 8, 8, 1536]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/faster_rcnn_inception_v2_feature_extractor.py b/models/faster_rcnn_inception_v2_feature_extractor.py new file mode 100644 index 0000000..3152c7a --- /dev/null +++ b/models/faster_rcnn_inception_v2_feature_extractor.py @@ -0,0 +1,255 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Inception V2 Faster R-CNN implementation. + +See "Rethinking the Inception Architecture for Computer Vision" +https://arxiv.org/abs/1512.00567 +""" +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import faster_rcnn_meta_arch +from nets import inception_v2 + +slim = contrib_slim + + +def _batch_norm_arg_scope(list_ops, + use_batch_norm=True, + batch_norm_decay=0.9997, + batch_norm_epsilon=0.001, + batch_norm_scale=False, + train_batch_norm=False): + """Slim arg scope for InceptionV2 batch norm.""" + if use_batch_norm: + batch_norm_params = { + 'is_training': train_batch_norm, + 'scale': batch_norm_scale, + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon + } + normalizer_fn = slim.batch_norm + else: + normalizer_fn = None + batch_norm_params = None + + return slim.arg_scope(list_ops, + normalizer_fn=normalizer_fn, + normalizer_params=batch_norm_params) + + +class FasterRCNNInceptionV2FeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): + """Faster R-CNN Inception V2 feature extractor implementation.""" + + def __init__(self, + is_training, + first_stage_features_stride, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0, + depth_multiplier=1.0, + min_depth=16): + """Constructor. + + Args: + is_training: See base class. + first_stage_features_stride: See base class. + batch_norm_trainable: See base class. + reuse_weights: See base class. + weight_decay: See base class. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + + Raises: + ValueError: If `first_stage_features_stride` is not 8 or 16. + """ + if first_stage_features_stride != 8 and first_stage_features_stride != 16: + raise ValueError('`first_stage_features_stride` must be 8 or 16.') + self._depth_multiplier = depth_multiplier + self._min_depth = min_depth + super(FasterRCNNInceptionV2FeatureExtractor, self).__init__( + is_training, first_stage_features_stride, batch_norm_trainable, + reuse_weights, weight_decay) + + def preprocess(self, resized_inputs): + """Faster R-CNN Inception V2 preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def _extract_proposal_features(self, preprocessed_inputs, scope): + """Extracts first stage RPN features. + + Args: + preprocessed_inputs: A [batch, height, width, channels] float32 tensor + representing a batch of images. + scope: A scope name. + + Returns: + rpn_feature_map: A tensor with shape [batch, height, width, depth] + activations: A dictionary mapping feature extractor tensor names to + tensors + + Raises: + InvalidArgumentError: If the spatial size of `preprocessed_inputs` + (height or width) is less than 33. + ValueError: If the created network is missing the required activation. + """ + + preprocessed_inputs.get_shape().assert_has_rank(4) + shape_assert = tf.Assert( + tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), + tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), + ['image size must at least be 33 in both height and width.']) + + with tf.control_dependencies([shape_assert]): + with tf.variable_scope('InceptionV2', + reuse=self._reuse_weights) as scope: + with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], + batch_norm_scale=True, + train_batch_norm=self._train_batch_norm): + _, activations = inception_v2.inception_v2_base( + preprocessed_inputs, + final_endpoint='Mixed_4e', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + scope=scope) + + return activations['Mixed_4e'], activations + + def _extract_box_classifier_features(self, proposal_feature_maps, scope): + """Extracts second stage box classifier features. + + Args: + proposal_feature_maps: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, crop_height, crop_width, depth] + representing the feature map cropped to each proposal. + scope: A scope name (unused). + + Returns: + proposal_classifier_features: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, height, width, depth] + representing box classifier features for each proposal. + """ + net = proposal_feature_maps + + depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) + trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + data_format = 'NHWC' + concat_dim = 3 if data_format == 'NHWC' else 1 + + with tf.variable_scope('InceptionV2', reuse=self._reuse_weights): + with slim.arg_scope( + [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, + padding='SAME', + data_format=data_format): + with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], + batch_norm_scale=True, + train_batch_norm=self._train_batch_norm): + + with tf.variable_scope('Mixed_5a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], + scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(net, [3, 3], stride=2, + scope='MaxPool_1a_3x3') + net = tf.concat([branch_0, branch_1, branch_2], concat_dim) + + with tf.variable_scope('Mixed_5b'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(352), [1, 1], + scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(160), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat([branch_0, branch_1, branch_2, branch_3], + concat_dim) + + with tf.variable_scope('Mixed_5c'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(352), [1, 1], + scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + proposal_classifier_features = tf.concat( + [branch_0, branch_1, branch_2, branch_3], concat_dim) + + return proposal_classifier_features diff --git a/models/faster_rcnn_mobilenet_v1_feature_extractor.py b/models/faster_rcnn_mobilenet_v1_feature_extractor.py new file mode 100644 index 0000000..98e0f56 --- /dev/null +++ b/models/faster_rcnn_mobilenet_v1_feature_extractor.py @@ -0,0 +1,195 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Mobilenet v1 Faster R-CNN implementation.""" +import numpy as np + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.utils import shape_utils +from nets import mobilenet_v1 + +slim = contrib_slim + + +def _get_mobilenet_conv_no_last_stride_defs(conv_depth_ratio_in_percentage): + if conv_depth_ratio_in_percentage not in [25, 50, 75, 100]: + raise ValueError( + 'Only the following ratio percentages are supported: 25, 50, 75, 100') + conv_depth_ratio_in_percentage = float(conv_depth_ratio_in_percentage) / 100.0 + channels = np.array([ + 32, 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024 + ], dtype=np.float32) + channels = (channels * conv_depth_ratio_in_percentage).astype(np.int32) + return [ + mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=channels[0]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[1]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[2]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[3]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[4]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[5]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=channels[6]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[7]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[8]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[9]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[10]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[11]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[12]), + mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=channels[13]) + ] + + +class FasterRCNNMobilenetV1FeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): + """Faster R-CNN Mobilenet V1 feature extractor implementation.""" + + def __init__(self, + is_training, + first_stage_features_stride, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0, + depth_multiplier=1.0, + min_depth=16, + skip_last_stride=False, + conv_depth_ratio_in_percentage=100): + """Constructor. + + Args: + is_training: See base class. + first_stage_features_stride: See base class. + batch_norm_trainable: See base class. + reuse_weights: See base class. + weight_decay: See base class. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + skip_last_stride: Skip the last stride if True. + conv_depth_ratio_in_percentage: Conv depth ratio in percentage. Only + applied if skip_last_stride is True. + + Raises: + ValueError: If `first_stage_features_stride` is not 8 or 16. + """ + if first_stage_features_stride != 8 and first_stage_features_stride != 16: + raise ValueError('`first_stage_features_stride` must be 8 or 16.') + self._depth_multiplier = depth_multiplier + self._min_depth = min_depth + self._skip_last_stride = skip_last_stride + self._conv_depth_ratio_in_percentage = conv_depth_ratio_in_percentage + super(FasterRCNNMobilenetV1FeatureExtractor, self).__init__( + is_training, first_stage_features_stride, batch_norm_trainable, + reuse_weights, weight_decay) + + def preprocess(self, resized_inputs): + """Faster R-CNN Mobilenet V1 preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def _extract_proposal_features(self, preprocessed_inputs, scope): + """Extracts first stage RPN features. + + Args: + preprocessed_inputs: A [batch, height, width, channels] float32 tensor + representing a batch of images. + scope: A scope name. + + Returns: + rpn_feature_map: A tensor with shape [batch, height, width, depth] + activations: A dictionary mapping feature extractor tensor names to + tensors + + Raises: + InvalidArgumentError: If the spatial size of `preprocessed_inputs` + (height or width) is less than 33. + ValueError: If the created network is missing the required activation. + """ + + preprocessed_inputs.get_shape().assert_has_rank(4) + preprocessed_inputs = shape_utils.check_min_image_dim( + min_dim=33, image_tensor=preprocessed_inputs) + + with slim.arg_scope( + mobilenet_v1.mobilenet_v1_arg_scope( + is_training=self._train_batch_norm, + weight_decay=self._weight_decay)): + with tf.variable_scope('MobilenetV1', + reuse=self._reuse_weights) as scope: + params = {} + if self._skip_last_stride: + params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( + conv_depth_ratio_in_percentage=self. + _conv_depth_ratio_in_percentage) + _, activations = mobilenet_v1.mobilenet_v1_base( + preprocessed_inputs, + final_endpoint='Conv2d_11_pointwise', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + scope=scope, + **params) + return activations['Conv2d_11_pointwise'], activations + + def _extract_box_classifier_features(self, proposal_feature_maps, scope): + """Extracts second stage box classifier features. + + Args: + proposal_feature_maps: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, crop_height, crop_width, depth] + representing the feature map cropped to each proposal. + scope: A scope name (unused). + + Returns: + proposal_classifier_features: A 4-D float tensor with shape + [batch_size * self.max_num_proposals, height, width, depth] + representing box classifier features for each proposal. + """ + net = proposal_feature_maps + + conv_depth = 1024 + if self._skip_last_stride: + conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0 + conv_depth = int(float(conv_depth) * conv_depth_ratio) + + depth = lambda d: max(int(d * 1.0), 16) + with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): + with slim.arg_scope( + mobilenet_v1.mobilenet_v1_arg_scope( + is_training=self._train_batch_norm, + weight_decay=self._weight_decay)): + with slim.arg_scope( + [slim.conv2d, slim.separable_conv2d], padding='SAME'): + net = slim.separable_conv2d( + net, + depth(conv_depth), [3, 3], + depth_multiplier=1, + stride=2, + scope='Conv2d_12_pointwise') + return slim.separable_conv2d( + net, + depth(conv_depth), [3, 3], + depth_multiplier=1, + stride=1, + scope='Conv2d_13_pointwise') diff --git a/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py b/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py new file mode 100644 index 0000000..fcefe61 --- /dev/null +++ b/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py @@ -0,0 +1,126 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for faster_rcnn_mobilenet_v1_feature_extractor.""" + +import numpy as np +import tensorflow as tf + +from object_detection.models import faster_rcnn_mobilenet_v1_feature_extractor as faster_rcnn_mobilenet_v1 + + +class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase): + + def _build_feature_extractor(self, first_stage_features_stride): + return faster_rcnn_mobilenet_v1.FasterRCNNMobilenetV1FeatureExtractor( + is_training=False, + first_stage_features_stride=first_stage_features_stride, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0) + + def test_extract_proposal_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [4, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [4, 14, 14, 512]) + + def test_extract_proposal_features_stride_eight(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=8) + preprocessed_inputs = tf.random_uniform( + [4, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [4, 14, 14, 512]) + + def test_extract_proposal_features_half_size_input(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 112, 112, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 7, 7, 512]) + + def test_extract_proposal_features_dies_on_invalid_stride(self): + with self.assertRaises(ValueError): + self._build_feature_extractor(first_stage_features_stride=99) + + def test_extract_proposal_features_dies_on_very_small_images(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(tf.errors.InvalidArgumentError): + sess.run( + features_shape, + feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) + + def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [224, 224, 3], maxval=255, dtype=tf.float32) + with self.assertRaises(ValueError): + feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + + def test_extract_box_classifier_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + proposal_feature_maps = tf.random_uniform( + [3, 14, 14, 576], maxval=255, dtype=tf.float32) + proposal_classifier_features = ( + feature_extractor.extract_box_classifier_features( + proposal_feature_maps, scope='TestScope')) + features_shape = tf.shape(proposal_classifier_features) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [3, 7, 7, 1024]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/faster_rcnn_resnet_v1_feature_extractor_test.py b/models/faster_rcnn_resnet_v1_feature_extractor_test.py new file mode 100644 index 0000000..876235e --- /dev/null +++ b/models/faster_rcnn_resnet_v1_feature_extractor_test.py @@ -0,0 +1,165 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor.""" + +import numpy as np +import tensorflow as tf + +from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1 + + +class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase): + + def _build_feature_extractor(self, + first_stage_features_stride, + activation_fn=tf.nn.relu, + architecture='resnet_v1_101'): + feature_extractor_map = { + 'resnet_v1_50': + faster_rcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, + 'resnet_v1_101': + faster_rcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, + 'resnet_v1_152': + faster_rcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor + } + return feature_extractor_map[architecture]( + is_training=False, + first_stage_features_stride=first_stage_features_stride, + activation_fn=activation_fn, + batch_norm_trainable=False, + reuse_weights=None, + weight_decay=0.0) + + def test_extract_proposal_features_returns_expected_size(self): + for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']: + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16, architecture=architecture) + preprocessed_inputs = tf.random_uniform( + [4, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [4, 14, 14, 1024]) + + def test_extract_proposal_features_stride_eight(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=8) + preprocessed_inputs = tf.random_uniform( + [4, 224, 224, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [4, 28, 28, 1024]) + + def test_extract_proposal_features_half_size_input(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [1, 112, 112, 3], maxval=255, dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [1, 7, 7, 1024]) + + def test_extract_proposal_features_dies_on_invalid_stride(self): + with self.assertRaises(ValueError): + self._build_feature_extractor(first_stage_features_stride=99) + + def test_extract_proposal_features_dies_on_very_small_images(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + features_shape = tf.shape(rpn_feature_map) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(tf.errors.InvalidArgumentError): + sess.run( + features_shape, + feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) + + def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + preprocessed_inputs = tf.random_uniform( + [224, 224, 3], maxval=255, dtype=tf.float32) + with self.assertRaises(ValueError): + feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestScope') + + def test_extract_box_classifier_features_returns_expected_size(self): + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16) + proposal_feature_maps = tf.random_uniform( + [3, 7, 7, 1024], maxval=255, dtype=tf.float32) + proposal_classifier_features = ( + feature_extractor.extract_box_classifier_features( + proposal_feature_maps, scope='TestScope')) + features_shape = tf.shape(proposal_classifier_features) + + init_op = tf.global_variables_initializer() + with self.test_session() as sess: + sess.run(init_op) + features_shape_out = sess.run(features_shape) + self.assertAllEqual(features_shape_out, [3, 7, 7, 2048]) + + def test_overwriting_activation_fn(self): + for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']: + feature_extractor = self._build_feature_extractor( + first_stage_features_stride=16, + architecture=architecture, + activation_fn=tf.nn.relu6) + preprocessed_inputs = tf.random_uniform([4, 224, 224, 3], + maxval=255, + dtype=tf.float32) + rpn_feature_map, _ = feature_extractor.extract_proposal_features( + preprocessed_inputs, scope='TestStage1Scope') + _ = feature_extractor.extract_box_classifier_features( + rpn_feature_map, scope='TestStaget2Scope') + conv_ops = [ + op for op in tf.get_default_graph().get_operations() + if op.type == 'Relu6' + ] + op_names = [op.name for op in conv_ops] + + self.assertIsNotNone(conv_ops) + self.assertIn('TestStage1Scope/resnet_v1_50/resnet_v1_50/conv1/Relu6', + op_names) + self.assertIn( + 'TestStaget2Scope/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Relu6', + op_names) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/feature_map_generators.py b/models/feature_map_generators.py new file mode 100644 index 0000000..f75932c --- /dev/null +++ b/models/feature_map_generators.py @@ -0,0 +1,823 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions to generate a list of feature maps based on image features. + +Provides several feature map generators that can be used to build object +detection feature extractors. + +Object detection feature extractors usually are built by stacking two components +- A base feature extractor such as Inception V3 and a feature map generator. +Feature map generators build on the base feature extractors and produce a list +of final feature maps. +""" +import collections +import functools +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim +from object_detection.utils import ops +from object_detection.utils import shape_utils +slim = contrib_slim + +# Activation bound used for TPU v1. Activations will be clipped to +# [-ACTIVATION_BOUND, ACTIVATION_BOUND] when training with +# use_bounded_activations enabled. +ACTIVATION_BOUND = 6.0 + + +def get_depth_fn(depth_multiplier, min_depth): + """Builds a callable to compute depth (output channels) of conv filters. + + Args: + depth_multiplier: a multiplier for the nominal depth. + min_depth: a lower bound on the depth of filters. + + Returns: + A callable that takes in a nominal depth and returns the depth to use. + """ + def multiply_depth(depth): + new_depth = int(depth * depth_multiplier) + return max(new_depth, min_depth) + return multiply_depth + + +def create_conv_block( + use_depthwise, kernel_size, padding, stride, layer_name, conv_hyperparams, + is_training, freeze_batchnorm, depth): + """Create Keras layers for depthwise & non-depthwise convolutions. + + Args: + use_depthwise: Whether to use depthwise separable conv instead of regular + conv. + kernel_size: A list of length 2: [kernel_height, kernel_width] of the + filters. Can be an int if both values are the same. + padding: One of 'VALID' or 'SAME'. + stride: A list of length 2: [stride_height, stride_width], specifying the + convolution stride. Can be an int if both strides are the same. + layer_name: String. The name of the layer. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + is_training: Indicates whether the feature generator is in training mode. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + depth: Depth of output feature maps. + + Returns: + A list of conv layers. + """ + layers = [] + if use_depthwise: + kwargs = conv_hyperparams.params() + # Both the regularizer and initializer apply to the depthwise layer, + # so we remap the kernel_* to depthwise_* here. + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + layers.append( + tf.keras.layers.SeparableConv2D( + depth, [kernel_size, kernel_size], + depth_multiplier=1, + padding=padding, + strides=stride, + name=layer_name + '_depthwise_conv', + **kwargs)) + else: + layers.append(tf.keras.layers.Conv2D( + depth, + [kernel_size, kernel_size], + padding=padding, + strides=stride, + name=layer_name + '_conv', + **conv_hyperparams.params())) + layers.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name=layer_name + '_batchnorm')) + layers.append( + conv_hyperparams.build_activation_layer( + name=layer_name)) + return layers + + +class KerasMultiResolutionFeatureMaps(tf.keras.Model): + """Generates multi resolution feature maps from input image features. + + A Keras model that generates multi-scale feature maps for detection as in the + SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. + + More specifically, when called on inputs it performs the following two tasks: + 1) If a layer name is provided in the configuration, returns that layer as a + feature map. + 2) If a layer name is left as an empty string, constructs a new feature map + based on the spatial shape and depth configuration. Note that the current + implementation only supports generating new layers using convolution of + stride 2 resulting in a spatial resolution reduction by a factor of 2. + By default convolution kernel size is set to 3, and it can be customized + by caller. + + An example of the configuration for Inception V3: + { + 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + + When this feature generator object is called on input image_features: + Args: + image_features: A dictionary of handles to activation tensors from the + base feature extractor. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + """ + + def __init__(self, + feature_map_layout, + depth_multiplier, + min_depth, + insert_1x1_conv, + is_training, + conv_hyperparams, + freeze_batchnorm, + name=None): + """Constructor. + + Args: + feature_map_layout: Dictionary of specifications for the feature map + layouts in the following format (Inception V2/V3 respectively): + { + 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + or + { + 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + If 'from_layer' is specified, the specified feature map is directly used + as a box predictor layer, and the layer_depth is directly infered from + the feature map (instead of using the provided 'layer_depth' parameter). + In this case, our convention is to set 'layer_depth' to -1 for clarity. + Otherwise, if 'from_layer' is an empty string, then the box predictor + layer will be built from the previous layer using convolution + operations. Note that the current implementation only supports + generating new layers using convolutions of stride 2 (resulting in a + spatial resolution reduction by a factor of 2), and will be extended to + a more flexible design. Convolution kernel size is set to 3 by default, + and can be customized by 'conv_kernel_size' parameter (similarily, + 'conv_kernel_size' should be set to -1 if 'from_layer' is specified). + The created convolution operation will be a normal 2D convolution by + default, and a depthwise convolution followed by 1x1 convolution if + 'use_depthwise' is set to True. + depth_multiplier: Depth multiplier for convolutional layers. + min_depth: Minimum depth for convolutional layers. + insert_1x1_conv: A boolean indicating whether an additional 1x1 + convolution should be inserted before shrinking the feature map. + is_training: Indicates whether the feature generator is in training mode. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + name: A string name scope to assign to the model. If 'None', Keras + will auto-generate one from the class name. + """ + super(KerasMultiResolutionFeatureMaps, self).__init__(name=name) + + self.feature_map_layout = feature_map_layout + self.convolutions = [] + + depth_fn = get_depth_fn(depth_multiplier, min_depth) + + base_from_layer = '' + use_explicit_padding = False + if 'use_explicit_padding' in feature_map_layout: + use_explicit_padding = feature_map_layout['use_explicit_padding'] + use_depthwise = False + if 'use_depthwise' in feature_map_layout: + use_depthwise = feature_map_layout['use_depthwise'] + for index, from_layer in enumerate(feature_map_layout['from_layer']): + net = [] + layer_depth = feature_map_layout['layer_depth'][index] + conv_kernel_size = 3 + if 'conv_kernel_size' in feature_map_layout: + conv_kernel_size = feature_map_layout['conv_kernel_size'][index] + if from_layer: + base_from_layer = from_layer + else: + if insert_1x1_conv: + layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( + base_from_layer, index, depth_fn(layer_depth / 2)) + net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2), + [1, 1], + padding='SAME', + strides=1, + name=layer_name + '_conv', + **conv_hyperparams.params())) + net.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name=layer_name + '_batchnorm')) + net.append( + conv_hyperparams.build_activation_layer( + name=layer_name)) + + layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( + base_from_layer, index, conv_kernel_size, conv_kernel_size, + depth_fn(layer_depth)) + stride = 2 + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + # We define this function here while capturing the value of + # conv_kernel_size, to avoid holding a reference to the loop variable + # conv_kernel_size inside of a lambda function + def fixed_padding(features, kernel_size=conv_kernel_size): + return ops.fixed_padding(features, kernel_size) + net.append(tf.keras.layers.Lambda(fixed_padding)) + # TODO(rathodv): Add some utilities to simplify the creation of + # Depthwise & non-depthwise convolutions w/ normalization & activations + if use_depthwise: + net.append(tf.keras.layers.DepthwiseConv2D( + [conv_kernel_size, conv_kernel_size], + depth_multiplier=1, + padding=padding, + strides=stride, + name=layer_name + '_depthwise_conv', + **conv_hyperparams.params())) + net.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name=layer_name + '_depthwise_batchnorm')) + net.append( + conv_hyperparams.build_activation_layer( + name=layer_name + '_depthwise')) + + net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1], + padding='SAME', + strides=1, + name=layer_name + '_conv', + **conv_hyperparams.params())) + net.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name=layer_name + '_batchnorm')) + net.append( + conv_hyperparams.build_activation_layer( + name=layer_name)) + + else: + net.append(tf.keras.layers.Conv2D( + depth_fn(layer_depth), + [conv_kernel_size, conv_kernel_size], + padding=padding, + strides=stride, + name=layer_name + '_conv', + **conv_hyperparams.params())) + net.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name=layer_name + '_batchnorm')) + net.append( + conv_hyperparams.build_activation_layer( + name=layer_name)) + + # Until certain bugs are fixed in checkpointable lists, + # this net must be appended only once it's been filled with layers + self.convolutions.append(net) + + def call(self, image_features): + """Generate the multi-resolution feature maps. + + Executed when calling the `.__call__` method on input. + + Args: + image_features: A dictionary of handles to activation tensors from the + base feature extractor. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + """ + feature_maps = [] + feature_map_keys = [] + + for index, from_layer in enumerate(self.feature_map_layout['from_layer']): + if from_layer: + feature_map = image_features[from_layer] + feature_map_keys.append(from_layer) + else: + feature_map = feature_maps[-1] + for layer in self.convolutions[index]: + feature_map = layer(feature_map) + layer_name = self.convolutions[index][-1].name + feature_map_keys.append(layer_name) + feature_maps.append(feature_map) + return collections.OrderedDict( + [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) + + +def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, + min_depth, insert_1x1_conv, image_features, + pool_residual=False): + """Generates multi resolution feature maps from input image features. + + Generates multi-scale feature maps for detection as in the SSD papers by + Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. + + More specifically, it performs the following two tasks: + 1) If a layer name is provided in the configuration, returns that layer as a + feature map. + 2) If a layer name is left as an empty string, constructs a new feature map + based on the spatial shape and depth configuration. Note that the current + implementation only supports generating new layers using convolution of + stride 2 resulting in a spatial resolution reduction by a factor of 2. + By default convolution kernel size is set to 3, and it can be customized + by caller. + + An example of the configuration for Inception V3: + { + 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + + Args: + feature_map_layout: Dictionary of specifications for the feature map + layouts in the following format (Inception V2/V3 respectively): + { + 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + or + { + 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], + 'layer_depth': [-1, -1, -1, 512, 256, 128] + } + If 'from_layer' is specified, the specified feature map is directly used + as a box predictor layer, and the layer_depth is directly infered from the + feature map (instead of using the provided 'layer_depth' parameter). In + this case, our convention is to set 'layer_depth' to -1 for clarity. + Otherwise, if 'from_layer' is an empty string, then the box predictor + layer will be built from the previous layer using convolution operations. + Note that the current implementation only supports generating new layers + using convolutions of stride 2 (resulting in a spatial resolution + reduction by a factor of 2), and will be extended to a more flexible + design. Convolution kernel size is set to 3 by default, and can be + customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size' + should be set to -1 if 'from_layer' is specified). The created convolution + operation will be a normal 2D convolution by default, and a depthwise + convolution followed by 1x1 convolution if 'use_depthwise' is set to True. + depth_multiplier: Depth multiplier for convolutional layers. + min_depth: Minimum depth for convolutional layers. + insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution + should be inserted before shrinking the feature map. + image_features: A dictionary of handles to activation tensors from the + base feature extractor. + pool_residual: Whether to add an average pooling layer followed by a + residual connection between subsequent feature maps when the channel + depth match. For example, with option 'layer_depth': [-1, 512, 256, 256], + a pooling and residual layer is added between the third and forth feature + map. This option is better used with Weight Shared Convolution Box + Predictor when all feature maps have the same channel depth to encourage + more consistent features across multi-scale feature maps. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + + Raises: + ValueError: if the number entries in 'from_layer' and + 'layer_depth' do not match. + ValueError: if the generated layer does not have the same resolution + as specified. + """ + depth_fn = get_depth_fn(depth_multiplier, min_depth) + + feature_map_keys = [] + feature_maps = [] + base_from_layer = '' + use_explicit_padding = False + if 'use_explicit_padding' in feature_map_layout: + use_explicit_padding = feature_map_layout['use_explicit_padding'] + use_depthwise = False + if 'use_depthwise' in feature_map_layout: + use_depthwise = feature_map_layout['use_depthwise'] + for index, from_layer in enumerate(feature_map_layout['from_layer']): + layer_depth = feature_map_layout['layer_depth'][index] + conv_kernel_size = 3 + if 'conv_kernel_size' in feature_map_layout: + conv_kernel_size = feature_map_layout['conv_kernel_size'][index] + if from_layer: + feature_map = image_features[from_layer] + base_from_layer = from_layer + feature_map_keys.append(from_layer) + else: + pre_layer = feature_maps[-1] + pre_layer_depth = pre_layer.get_shape().as_list()[3] + intermediate_layer = pre_layer + if insert_1x1_conv: + layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( + base_from_layer, index, depth_fn(layer_depth / 2)) + intermediate_layer = slim.conv2d( + pre_layer, + depth_fn(layer_depth / 2), [1, 1], + padding='SAME', + stride=1, + scope=layer_name) + layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( + base_from_layer, index, conv_kernel_size, conv_kernel_size, + depth_fn(layer_depth)) + stride = 2 + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + intermediate_layer = ops.fixed_padding( + intermediate_layer, conv_kernel_size) + if use_depthwise: + feature_map = slim.separable_conv2d( + intermediate_layer, + None, [conv_kernel_size, conv_kernel_size], + depth_multiplier=1, + padding=padding, + stride=stride, + scope=layer_name + '_depthwise') + feature_map = slim.conv2d( + feature_map, + depth_fn(layer_depth), [1, 1], + padding='SAME', + stride=1, + scope=layer_name) + if pool_residual and pre_layer_depth == depth_fn(layer_depth): + feature_map += slim.avg_pool2d( + pre_layer, [3, 3], + padding='SAME', + stride=2, + scope=layer_name + '_pool') + else: + feature_map = slim.conv2d( + intermediate_layer, + depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], + padding=padding, + stride=stride, + scope=layer_name) + feature_map_keys.append(layer_name) + feature_maps.append(feature_map) + return collections.OrderedDict( + [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) + + +class KerasFpnTopDownFeatureMaps(tf.keras.Model): + """Generates Keras based `top-down` feature maps for Feature Pyramid Networks. + + See https://arxiv.org/abs/1612.03144 for details. + """ + + def __init__(self, + num_levels, + depth, + is_training, + conv_hyperparams, + freeze_batchnorm, + use_depthwise=False, + use_explicit_padding=False, + use_bounded_activations=False, + use_native_resize_op=False, + scope=None, + name=None): + """Constructor. + + Args: + num_levels: the number of image features. + depth: depth of output feature maps. + is_training: Indicates whether the feature generator is in training mode. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_depthwise: whether to use depthwise separable conv instead of regular + conv. + use_explicit_padding: whether to use explicit padding. + use_bounded_activations: Whether or not to clip activations to range + [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend + themselves to quantized inference. + use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op + for the upsampling process instead of reshape and broadcasting + implementation. + scope: A scope name to wrap this op under. + name: A string name scope to assign to the model. If 'None', Keras + will auto-generate one from the class name. + """ + super(KerasFpnTopDownFeatureMaps, self).__init__(name=name) + + self.scope = scope if scope else 'top_down' + self.top_layers = [] + self.residual_blocks = [] + self.top_down_blocks = [] + self.reshape_blocks = [] + self.conv_layers = [] + + padding = 'VALID' if use_explicit_padding else 'SAME' + stride = 1 + kernel_size = 3 + def clip_by_value(features): + return tf.clip_by_value(features, -ACTIVATION_BOUND, ACTIVATION_BOUND) + + # top layers + self.top_layers.append(tf.keras.layers.Conv2D( + depth, [1, 1], strides=stride, padding=padding, + name='projection_%d' % num_levels, + **conv_hyperparams.params(use_bias=True))) + if use_bounded_activations: + self.top_layers.append(tf.keras.layers.Lambda( + clip_by_value, name='clip_by_value')) + + for level in reversed(range(num_levels - 1)): + # to generate residual from image features + residual_net = [] + # to preprocess top_down (the image feature map from last layer) + top_down_net = [] + # to reshape top_down according to residual if necessary + reshaped_residual = [] + # to apply convolution layers to feature map + conv_net = [] + + # residual block + residual_net.append(tf.keras.layers.Conv2D( + depth, [1, 1], padding=padding, strides=1, + name='projection_%d' % (level + 1), + **conv_hyperparams.params(use_bias=True))) + if use_bounded_activations: + residual_net.append(tf.keras.layers.Lambda( + clip_by_value, name='clip_by_value')) + + # top-down block + # TODO (b/128922690): clean-up of ops.nearest_neighbor_upsampling + if use_native_resize_op: + def resize_nearest_neighbor(image): + image_shape = shape_utils.combined_static_and_dynamic_shape(image) + return tf.image.resize_nearest_neighbor( + image, [image_shape[1] * 2, image_shape[2] * 2]) + top_down_net.append(tf.keras.layers.Lambda( + resize_nearest_neighbor, name='nearest_neighbor_upsampling')) + else: + def nearest_neighbor_upsampling(image): + return ops.nearest_neighbor_upsampling(image, scale=2) + top_down_net.append(tf.keras.layers.Lambda( + nearest_neighbor_upsampling, name='nearest_neighbor_upsampling')) + + # reshape block + if use_explicit_padding: + def reshape(inputs): + residual_shape = tf.shape(inputs[0]) + return inputs[1][:, :residual_shape[1], :residual_shape[2], :] + reshaped_residual.append( + tf.keras.layers.Lambda(reshape, name='reshape')) + + # down layers + if use_bounded_activations: + conv_net.append(tf.keras.layers.Lambda( + clip_by_value, name='clip_by_value')) + + if use_explicit_padding: + def fixed_padding(features, kernel_size=kernel_size): + return ops.fixed_padding(features, kernel_size) + conv_net.append(tf.keras.layers.Lambda( + fixed_padding, name='fixed_padding')) + + layer_name = 'smoothing_%d' % (level + 1) + conv_block = create_conv_block( + use_depthwise, kernel_size, padding, stride, layer_name, + conv_hyperparams, is_training, freeze_batchnorm, depth) + conv_net.extend(conv_block) + + self.residual_blocks.append(residual_net) + self.top_down_blocks.append(top_down_net) + self.reshape_blocks.append(reshaped_residual) + self.conv_layers.append(conv_net) + + def call(self, image_features): + """Generate the multi-resolution feature maps. + + Executed when calling the `.__call__` method on input. + + Args: + image_features: list of tuples of (tensor_name, image_feature_tensor). + Spatial resolutions of succesive tensors must reduce exactly by a factor + of 2. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + """ + output_feature_maps_list = [] + output_feature_map_keys = [] + + with tf.name_scope(self.scope): + top_down = image_features[-1][1] + for layer in self.top_layers: + top_down = layer(top_down) + output_feature_maps_list.append(top_down) + output_feature_map_keys.append('top_down_%s' % image_features[-1][0]) + + num_levels = len(image_features) + for index, level in enumerate(reversed(range(num_levels - 1))): + residual = image_features[level][1] + top_down = output_feature_maps_list[-1] + for layer in self.residual_blocks[index]: + residual = layer(residual) + for layer in self.top_down_blocks[index]: + top_down = layer(top_down) + for layer in self.reshape_blocks[index]: + top_down = layer([residual, top_down]) + top_down += residual + for layer in self.conv_layers[index]: + top_down = layer(top_down) + output_feature_maps_list.append(top_down) + output_feature_map_keys.append('top_down_%s' % image_features[level][0]) + return collections.OrderedDict(reversed( + list(zip(output_feature_map_keys, output_feature_maps_list)))) + + +def fpn_top_down_feature_maps(image_features, + depth, + use_depthwise=False, + use_explicit_padding=False, + use_bounded_activations=False, + scope=None, + use_native_resize_op=False): + """Generates `top-down` feature maps for Feature Pyramid Networks. + + See https://arxiv.org/abs/1612.03144 for details. + + Args: + image_features: list of tuples of (tensor_name, image_feature_tensor). + Spatial resolutions of succesive tensors must reduce exactly by a factor + of 2. + depth: depth of output feature maps. + use_depthwise: whether to use depthwise separable conv instead of regular + conv. + use_explicit_padding: whether to use explicit padding. + use_bounded_activations: Whether or not to clip activations to range + [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend + themselves to quantized inference. + scope: A scope name to wrap this op under. + use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for + the upsampling process instead of reshape and broadcasting implementation. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + """ + with tf.name_scope(scope, 'top_down'): + num_levels = len(image_features) + output_feature_maps_list = [] + output_feature_map_keys = [] + padding = 'VALID' if use_explicit_padding else 'SAME' + kernel_size = 3 + with slim.arg_scope( + [slim.conv2d, slim.separable_conv2d], padding=padding, stride=1): + top_down = slim.conv2d( + image_features[-1][1], + depth, [1, 1], activation_fn=None, normalizer_fn=None, + scope='projection_%d' % num_levels) + + print('------------------------') + print(use_bounded_activations) + print('------------------------') + if use_bounded_activations: + top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, + ACTIVATION_BOUND) + output_feature_maps_list.append(top_down) + output_feature_map_keys.append( + 'top_down_%s' % image_features[-1][0]) + + for level in reversed(range(num_levels - 1)): + if use_native_resize_op: + with tf.name_scope('nearest_neighbor_upsampling'): + top_down_shape = shape_utils.combined_static_and_dynamic_shape( + top_down) + top_down = tf.image.resize_nearest_neighbor( + top_down, [top_down_shape[1] * 2, top_down_shape[2] * 2]) + else: + top_down = ops.nearest_neighbor_upsampling(top_down, scale=2) + residual = slim.conv2d( + image_features[level][1], depth, [1, 1], + activation_fn=None, normalizer_fn=None, + scope='projection_%d' % (level + 1)) + if use_bounded_activations: + residual = tf.clip_by_value(residual, -ACTIVATION_BOUND, + ACTIVATION_BOUND) + if use_explicit_padding: + # slice top_down to the same shape as residual + residual_shape = tf.shape(residual) + top_down = top_down[:, :residual_shape[1], :residual_shape[2], :] + top_down += residual + if use_bounded_activations: + top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, + ACTIVATION_BOUND) + if use_depthwise: + conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) + else: + conv_op = slim.conv2d + if use_explicit_padding: + top_down = ops.fixed_padding(top_down, kernel_size) + output_feature_maps_list.append(conv_op( + top_down, + depth, [kernel_size, kernel_size], + scope='smoothing_%d' % (level + 1))) + output_feature_map_keys.append('top_down_%s' % image_features[level][0]) + return collections.OrderedDict(reversed( + list(zip(output_feature_map_keys, output_feature_maps_list)))) + + +def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers, + image_features, replace_pool_with_conv=False): + """Generates pooling pyramid feature maps. + + The pooling pyramid feature maps is motivated by + multi_resolution_feature_maps. The main difference are that it is simpler and + reduces the number of free parameters. + + More specifically: + - Instead of using convolutions to shrink the feature map, it uses max + pooling, therefore totally gets rid of the parameters in convolution. + - By pooling feature from larger map up to a single cell, it generates + features in the same feature space. + - Instead of independently making box predictions from individual maps, it + shares the same classifier across different feature maps, therefore reduces + the "mis-calibration" across different scales. + + See go/ppn-detection for more details. + + Args: + base_feature_map_depth: Depth of the base feature before the max pooling. + num_layers: Number of layers used to make predictions. They are pooled + from the base feature. + image_features: A dictionary of handles to activation tensors from the + feature extractor. + replace_pool_with_conv: Whether or not to replace pooling operations with + convolutions in the PPN. Default is False. + + Returns: + feature_maps: an OrderedDict mapping keys (feature map names) to + tensors where each tensor has shape [batch, height_i, width_i, depth_i]. + Raises: + ValueError: image_features does not contain exactly one entry + """ + if len(image_features) != 1: + raise ValueError('image_features should be a dictionary of length 1.') + image_features = image_features[image_features.keys()[0]] + + feature_map_keys = [] + feature_maps = [] + feature_map_key = 'Base_Conv2d_1x1_%d' % base_feature_map_depth + if base_feature_map_depth > 0: + image_features = slim.conv2d( + image_features, + base_feature_map_depth, + [1, 1], # kernel size + padding='SAME', stride=1, scope=feature_map_key) + # Add a 1x1 max-pooling node (a no op node) immediately after the conv2d for + # TPU v1 compatibility. Without the following dummy op, TPU runtime + # compiler will combine the convolution with one max-pooling below into a + # single cycle, so getting the conv2d feature becomes impossible. + image_features = slim.max_pool2d( + image_features, [1, 1], padding='SAME', stride=1, scope=feature_map_key) + feature_map_keys.append(feature_map_key) + feature_maps.append(image_features) + feature_map = image_features + if replace_pool_with_conv: + with slim.arg_scope([slim.conv2d], padding='SAME', stride=2): + for i in range(num_layers - 1): + feature_map_key = 'Conv2d_{}_3x3_s2_{}'.format(i, + base_feature_map_depth) + feature_map = slim.conv2d( + feature_map, base_feature_map_depth, [3, 3], scope=feature_map_key) + feature_map_keys.append(feature_map_key) + feature_maps.append(feature_map) + else: + with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2): + for i in range(num_layers - 1): + feature_map_key = 'MaxPool2d_%d_2x2' % i + feature_map = slim.max_pool2d( + feature_map, [2, 2], padding='SAME', scope=feature_map_key) + feature_map_keys.append(feature_map_key) + feature_maps.append(feature_map) + return collections.OrderedDict( + [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) diff --git a/models/keras_models/base_models/original_mobilenet_v2.py b/models/keras_models/base_models/original_mobilenet_v2.py new file mode 100644 index 0000000..28e5b69 --- /dev/null +++ b/models/keras_models/base_models/original_mobilenet_v2.py @@ -0,0 +1,479 @@ + +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""MobileNet v2 models for Keras. + +MobileNetV2 is a general architecture and can be used for multiple use cases. +Depending on the use case, it can use different input layer size and +different width factors. This allows different width models to reduce +the number of multiply-adds and thereby +reduce inference cost on mobile devices. + +MobileNetV2 is very similar to the original MobileNet, +except that it uses inverted residual blocks with +bottlenecking features. It has a drastically lower +parameter count than the original MobileNet. +MobileNets support any input size greater +than 32 x 32, with larger image sizes +offering better performance. + +The number of parameters and number of multiply-adds +can be modified by using the `alpha` parameter, +which increases/decreases the number of filters in each layer. +By altering the image size and `alpha` parameter, +all 22 models from the paper can be built, with ImageNet weights provided. + +The paper demonstrates the performance of MobileNets using `alpha` values of +1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 + +For each of these `alpha` values, weights for 5 different input image sizes +are provided (224, 192, 160, 128, and 96). + + +The following table describes the performance of +MobileNet on various input sizes: +------------------------------------------------------------------------ +MACs stands for Multiply Adds + + Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Acc| Top 5 Acc +--------------------------|------------|---------------|---------|----|------- +| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | +| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | +| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | +| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | +| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | +| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | +| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | +| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | +| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | +| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | +| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | +| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | +| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | +| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | +| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | +| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | +| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | +| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | +| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | +| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | +| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | +| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | + +The weights for all 16 models are obtained and translated from the Tensorflow +checkpoints from TensorFlow checkpoints found at +https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md + +# Reference +This file contains building code for MobileNetV2, based on +[MobileNetV2: Inverted Residuals and Linear Bottlenecks] +(https://arxiv.org/abs/1801.04381) + +Tests comparing this model to the existing Tensorflow model can be +found at +[mobilenet_v2_keras](https://github.com/JonathanCMitchell/mobilenet_v2_keras) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import warnings +import numpy as np +import tensorflow as tf + +Model = tf.keras.Model +Input = tf.keras.layers.Input +Activation = tf.keras.layers.Activation +BatchNormalization = tf.keras.layers.BatchNormalization +Conv2D = tf.keras.layers.Conv2D +DepthwiseConv2D = tf.keras.layers.DepthwiseConv2D +GlobalAveragePooling2D = tf.keras.layers.GlobalAveragePooling2D +Add = tf.keras.layers.Add +Dense = tf.keras.layers.Dense +K = tf.keras.Backend + + +def relu6(x): + return K.relu(x, max_value=6) + + +def _obtain_input_shape( + input_shape, + default_size, + min_size, + data_format, + require_flatten): + """Internal utility to compute/validate an ImageNet model's input shape. + + Arguments: + input_shape: either None (will return the default network input shape), + or a user-provided shape to be validated. + default_size: default input width/height for the model. + min_size: minimum input width/height accepted by the model. + data_format: image data format to use. + require_flatten: whether the model is expected to + be linked to a classifier via a Flatten layer. + + Returns: + An integer shape tuple (may include None entries). + + Raises: + ValueError: in case of invalid argument values. + """ + if input_shape and len(input_shape) == 3: + if data_format == 'channels_first': + if input_shape[0] not in {1, 3}: + warnings.warn( + 'This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[0]) + ' input channels.') + default_shape = (input_shape[0], default_size, default_size) + else: + if input_shape[-1] not in {1, 3}: + warnings.warn( + 'This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[-1]) + ' input channels.') + default_shape = (default_size, default_size, input_shape[-1]) + else: + if data_format == 'channels_first': + default_shape = (3, default_size, default_size) + else: + default_shape = (default_size, default_size, 3) + if input_shape: + if data_format == 'channels_first': + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + '`input_shape` must be a tuple of three integers.') + if ((input_shape[1] is not None and input_shape[1] < min_size) or + (input_shape[2] is not None and input_shape[2] < min_size)): + raise ValueError('Input size must be at least ' + + str(min_size) + 'x' + str(min_size) + + '; got `input_shape=' + + str(input_shape) + '`') + else: + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + '`input_shape` must be a tuple of three integers.') + if ((input_shape[0] is not None and input_shape[0] < min_size) or + (input_shape[1] is not None and input_shape[1] < min_size)): + raise ValueError('Input size must be at least ' + + str(min_size) + 'x' + str(min_size) + + '; got `input_shape=' + + str(input_shape) + '`') + else: + if require_flatten: + input_shape = default_shape + else: + if data_format == 'channels_first': + input_shape = (3, None, None) + else: + input_shape = (None, None, 3) + if require_flatten: + if None in input_shape: + raise ValueError('If `include_top` is True, ' + 'you should specify a static `input_shape`. ' + 'Got `input_shape=' + str(input_shape) + '`') + return input_shape + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + This function applies the "Inception" preprocessing which converts + the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing + function is different from `imagenet_utils.preprocess_input()`. + + Arguments: + x: a 4D numpy array consists of RGB values within [0, 255]. + + Returns: + Preprocessed array. + """ + x /= 128. + x -= 1. + return x.astype(np.float32) + + +# This function is taken from the original tf repo. +# It ensures that all layers have a channel number that is divisible by 8 +# It can be seen here: +# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def mobilenet_v2(input_shape=None, + alpha=1.0, + include_top=True, + classes=1000): + """Instantiates the MobileNetV2 architecture. + + To load a MobileNetV2 model via `load_model`, import the custom + objects `relu6` and pass them to the `custom_objects` parameter. + E.g. + model = load_model('mobilenet.h5', custom_objects={ + 'relu6': mobilenet.relu6}) + + Arguments: + input_shape: optional shape tuple, to be specified if you would + like to use a model with an input img resolution that is not + (224, 224, 3). + It should have exactly 3 inputs channels (224, 224, 3). + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + width multiplier in the MobileNetV2 paper. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + include_top: whether to include the fully-connected + layer at the top of the network. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + Returns: + A Keras model instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape or invalid depth_multiplier, alpha, + rows when weights='imagenet' + """ + + # Determine proper input shape and default size. + # If input_shape is None and no input_tensor + if input_shape is None: + default_size = 224 + + # If input_shape is not None, assume default size + else: + if K.image_data_format() == 'channels_first': + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + + if rows == cols and rows in [96, 128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + input_shape = _obtain_input_shape(input_shape, + default_size=default_size, + min_size=32, + data_format=K.image_data_format(), + require_flatten=include_top) + + if K.image_data_format() == 'channels_last': + row_axis, col_axis = (0, 1) + else: + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + + if K.image_data_format() != 'channels_last': + warnings.warn('The MobileNet family of models is only available ' + 'for the input data format "channels_last" ' + '(width, height, channels). ' + 'However your settings specify the default ' + 'data format "channels_first" (channels, width, height).' + ' You should set `image_data_format="channels_last"` ' + 'in your Keras config located at ~/.keras/keras.json. ' + 'The model being returned right now will expect inputs ' + 'to follow the "channels_last" data format.') + K.set_image_data_format('channels_last') + old_data_format = 'channels_first' + else: + old_data_format = None + + img_input = Input(shape=input_shape) + + first_block_filters = _make_divisible(32 * alpha, 8) + x = Conv2D(first_block_filters, + kernel_size=3, + strides=(2, 2), padding='same', + use_bias=False, name='Conv1')(img_input) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) + x = Activation(relu6, name='Conv1_relu')(x) + + x = _first_inverted_res_block(x, + filters=16, + alpha=alpha, + stride=1, + block_id=0) + + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, + expansion=6, block_id=1) + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, + expansion=6, block_id=2) + + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, + expansion=6, block_id=3) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, + expansion=6, block_id=4) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, + expansion=6, block_id=5) + + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, + expansion=6, block_id=6) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=7) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=8) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=9) + + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=10) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=11) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=12) + + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, + expansion=6, block_id=13) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, + expansion=6, block_id=14) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, + expansion=6, block_id=15) + + x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, + expansion=6, block_id=16) + + # no alpha applied to last conv as stated in the paper: + # if the width multiplier is greater than 1 we + # increase the number of output channels + if alpha > 1.0: + last_block_filters = _make_divisible(1280 * alpha, 8) + else: + last_block_filters = 1280 + + x = Conv2D(last_block_filters, + kernel_size=1, + use_bias=False, + name='Conv_1')(x) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) + x = Activation(relu6, name='out_relu')(x) + + if include_top: + x = GlobalAveragePooling2D()(x) + x = Dense(classes, activation='softmax', + use_bias=True, name='Logits')(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + inputs = img_input + + # Create model. + model = Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) + + if old_data_format: + K.set_image_data_format(old_data_format) + return model + + +def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): + """Build an inverted res block.""" + in_channels = int(inputs.shape[-1]) + pointwise_conv_filters = int(filters * alpha) + pointwise_filters = _make_divisible(pointwise_conv_filters, 8) + # Expand + + x = Conv2D(expansion * in_channels, kernel_size=1, padding='same', + use_bias=False, activation=None, + name='mobl%d_conv_expand' % block_id)(inputs) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, + name='bn%d_conv_bn_expand' % + block_id)(x) + x = Activation(relu6, name='conv_%d_relu' % block_id)(x) + + # Depthwise + x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, + use_bias=False, padding='same', + name='mobl%d_conv_depthwise' % block_id)(x) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, + name='bn%d_conv_depthwise' % block_id)(x) + + x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) + + # Project + x = Conv2D(pointwise_filters, + kernel_size=1, padding='same', use_bias=False, activation=None, + name='mobl%d_conv_project' % block_id)(x) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, + name='bn%d_conv_bn_project' % block_id)(x) + + if in_channels == pointwise_filters and stride == 1: + return Add(name='res_connect_' + str(block_id))([inputs, x]) + + return x + + +def _first_inverted_res_block(inputs, + stride, + alpha, filters, block_id): + """Build the first inverted res block.""" + in_channels = int(inputs.shape[-1]) + pointwise_conv_filters = int(filters * alpha) + pointwise_filters = _make_divisible(pointwise_conv_filters, 8) + + # Depthwise + x = DepthwiseConv2D(kernel_size=3, + strides=stride, activation=None, + use_bias=False, padding='same', + name='mobl%d_conv_depthwise' % + block_id)(inputs) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, + name='bn%d_conv_depthwise' % + block_id)(x) + x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) + + # Project + x = Conv2D(pointwise_filters, + kernel_size=1, + padding='same', + use_bias=False, + activation=None, + name='mobl%d_conv_project' % + block_id)(x) + x = BatchNormalization(epsilon=1e-3, momentum=0.999, + name='bn%d_conv_project' % + block_id)(x) + + if in_channels == pointwise_filters and stride == 1: + return Add(name='res_connect_' + str(block_id))([inputs, x]) + + return x diff --git a/models/keras_models/inception_resnet_v2.py b/models/keras_models/inception_resnet_v2.py new file mode 100644 index 0000000..ec99e3e --- /dev/null +++ b/models/keras_models/inception_resnet_v2.py @@ -0,0 +1,244 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A wrapper around the Keras InceptionResnetV2 models for object detection.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from object_detection.core import freezable_batch_norm + + +class _LayersOverride(object): + """Alternative Keras layers interface for the Keras InceptionResNetV2.""" + + def __init__(self, + batchnorm_training, + output_stride=16, + align_feature_maps=False, + batchnorm_scale=False, + default_batchnorm_momentum=0.999, + default_batchnorm_epsilon=1e-3, + weight_decay=0.00004): + """Alternative tf.keras.layers interface, for use by InceptionResNetV2. + + It is used by the Keras applications kwargs injection API to + modify the Inception Resnet V2 Keras application with changes required by + the Object Detection API. + + These injected interfaces make the following changes to the network: + + - Supports freezing batch norm layers + - Adds support for feature map alignment (like in the Slim model) + - Adds support for changing the output stride (like in the Slim model) + - Adds support for overriding various batch norm hyperparameters + + Because the Keras inception resnet v2 application does not assign explicit + names to most individual layers, the injection of output stride support + works by identifying convolution layers according to their filter counts + and pre-feature-map-alignment padding arguments. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + output_stride: A scalar that specifies the requested ratio of input to + output spatial resolution. Only supports 8 and 16. + align_feature_maps: When true, changes all the VALID paddings in the + network to SAME padding so that the feature maps are aligned. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale the + activations in the batch normalization layer. + default_batchnorm_momentum: Float. Batch norm layers will be constructed + using this value as the momentum. + default_batchnorm_epsilon: small float added to variance to avoid + dividing by zero. + weight_decay: the l2 regularization weight decay for weights variables. + (gets multiplied by 0.5 to map from slim l2 regularization weight to + Keras l2 regularization weight). + """ + self._use_atrous = output_stride == 8 + self._align_feature_maps = align_feature_maps + self._batchnorm_training = batchnorm_training + self._batchnorm_scale = batchnorm_scale + self._default_batchnorm_momentum = default_batchnorm_momentum + self._default_batchnorm_epsilon = default_batchnorm_epsilon + self.regularizer = tf.keras.regularizers.l2(weight_decay * 0.5) + + def Conv2D(self, filters, kernel_size, **kwargs): + """Builds a Conv2D layer according to the current Object Detection config. + + Overrides the Keras InceptionResnetV2 application's convolutions with ones + that follow the spec specified by the Object Detection hyperparameters. + + If feature map alignment is enabled, the padding will be forced to 'same'. + If output_stride is 8, some conv2d layers will be matched according to + their name or filter counts or pre-alignment padding parameters, and will + have the correct 'dilation rate' or 'strides' set. + + Args: + filters: The number of filters to use for the convolution. + kernel_size: The kernel size to specify the height and width of the 2D + convolution window. + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A Keras Conv2D layer specified by the Object Detection hyperparameter + configurations. + """ + kwargs['kernel_regularizer'] = self.regularizer + kwargs['bias_regularizer'] = self.regularizer + + # Because the Keras application does not set explicit names for most layers, + # (instead allowing names to auto-increment), we must match individual + # layers in the model according to their filter count, name, or + # pre-alignment mapping. This means we can only align the feature maps + # after we have applied our updates in cases where output_stride=8. + if self._use_atrous and (filters == 384): + kwargs['strides'] = 1 + + name = kwargs.get('name') + if self._use_atrous and ( + (name and 'block17' in name) or + (filters == 128 or filters == 160 or + (filters == 192 and kwargs.get('padding', '').lower() != 'valid'))): + kwargs['dilation_rate'] = 2 + + if self._align_feature_maps: + kwargs['padding'] = 'same' + + return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) + + def MaxPooling2D(self, pool_size, strides, **kwargs): + """Builds a pooling layer according to the current Object Detection config. + + Overrides the Keras InceptionResnetV2 application's MaxPooling2D layers with + ones that follow the spec specified by the Object Detection hyperparameters. + + If feature map alignment is enabled, the padding will be forced to 'same'. + If output_stride is 8, some pooling layers will be matched according to + their pre-alignment padding parameters, and will have their 'strides' + argument overridden. + + Args: + pool_size: The pool size specified by the Keras application. + strides: The strides specified by the unwrapped Keras application. + **kwargs: Keyword args specified by the Keras application for + constructing the max pooling layer. + + Returns: + A MaxPool2D layer specified by the Object Detection hyperparameter + configurations. + """ + if self._use_atrous and kwargs.get('padding', '').lower() == 'valid': + strides = 1 + + if self._align_feature_maps: + kwargs['padding'] = 'same' + + return tf.keras.layers.MaxPool2D(pool_size, strides=strides, **kwargs) + + # We alias MaxPool2D because Keras has that alias + MaxPool2D = MaxPooling2D # pylint: disable=invalid-name + + def BatchNormalization(self, **kwargs): + """Builds a normalization layer. + + Overrides the Keras application batch norm with the norm specified by the + Object Detection configuration. + + Args: + **kwargs: Keyword arguments from the `layers.BatchNormalization` calls in + the Keras application. + + Returns: + A normalization layer specified by the Object Detection hyperparameter + configurations. + """ + kwargs['scale'] = self._batchnorm_scale + return freezable_batch_norm.FreezableBatchNorm( + training=self._batchnorm_training, + epsilon=self._default_batchnorm_epsilon, + momentum=self._default_batchnorm_momentum, + **kwargs) + + # Forward all non-overridden methods to the keras layers + def __getattr__(self, item): + return getattr(tf.keras.layers, item) + + +# pylint: disable=invalid-name +def inception_resnet_v2( + batchnorm_training, + output_stride=16, + align_feature_maps=False, + batchnorm_scale=False, + weight_decay=0.00004, + default_batchnorm_momentum=0.9997, + default_batchnorm_epsilon=0.001, + **kwargs): + """Instantiates the InceptionResnetV2 architecture. + + (Modified for object detection) + + This wraps the InceptionResnetV2 tensorflow Keras application, but uses the + Keras application's kwargs-based monkey-patching API to override the Keras + architecture with the following changes: + + - Supports freezing batch norm layers with FreezableBatchNorms + - Adds support for feature map alignment (like in the Slim model) + - Adds support for changing the output stride (like in the Slim model) + - Changes the default batchnorm momentum to 0.9997 + - Adds support for overriding various batchnorm hyperparameters + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + output_stride: A scalar that specifies the requested ratio of input to + output spatial resolution. Only supports 8 and 16. + align_feature_maps: When true, changes all the VALID paddings in the + network to SAME padding so that the feature maps are aligned. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale the + activations in the batch normalization layer. + weight_decay: the l2 regularization weight decay for weights variables. + (gets multiplied by 0.5 to map from slim l2 regularization weight to + Keras l2 regularization weight). + default_batchnorm_momentum: Float. Batch norm layers will be constructed + using this value as the momentum. + default_batchnorm_epsilon: small float added to variance to avoid + dividing by zero. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.InceptionResNetV2` method that constructs the + Keras model. + + Returns: + A Keras model instance. + """ + if output_stride != 8 and output_stride != 16: + raise ValueError('output_stride must be 8 or 16.') + + layers_override = _LayersOverride( + batchnorm_training, + output_stride, + align_feature_maps=align_feature_maps, + batchnorm_scale=batchnorm_scale, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + weight_decay=weight_decay) + return tf.keras.applications.InceptionResNetV2( + layers=layers_override, **kwargs) +# pylint: enable=invalid-name diff --git a/models/keras_models/inception_resnet_v2_test.py b/models/keras_models/inception_resnet_v2_test.py new file mode 100644 index 0000000..18801b3 --- /dev/null +++ b/models/keras_models/inception_resnet_v2_test.py @@ -0,0 +1,223 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for inception_resnet_v2.py. + +This test mainly focuses on comparing slim inception resnet v2 and Keras +inception resnet v2 for object detection. To verify the consistency of the two +models, we compare: + 1. Output shape of each layer given different inputs + 2. Number of global variables + +We also visualize the model structure via Tensorboard, and compare the model +layout and the parameters of each Op to make sure the two implementations are +consistent. +""" + +import itertools +import numpy as np +import tensorflow as tf + +from object_detection.models.keras_models import inception_resnet_v2 +from object_detection.utils import test_case + +_KERAS_TO_SLIM_ENDPOINT_NAMES = { + 'activation': 'Conv2d_1a_3x3', + 'activation_1': 'Conv2d_2a_3x3', + 'activation_2': 'Conv2d_2b_3x3', + 'activation_3': 'Conv2d_3b_1x1', + 'activation_4': 'Conv2d_4a_3x3', + 'max_pooling2d': 'MaxPool_3a_3x3', + 'max_pooling2d_1': 'MaxPool_5a_3x3', + 'mixed_5b': 'Mixed_5b', + 'mixed_6a': 'Mixed_6a', + 'block17_20_ac': 'PreAuxLogits', + 'mixed_7a': 'Mixed_7a', + 'conv_7b_ac': 'Conv2d_7b_1x1', +} + +_SLIM_ENDPOINT_SHAPES_128 = { + 'Conv2d_1a_3x3': (2, 64, 64, 32), + 'Conv2d_2a_3x3': (2, 64, 64, 32), + 'Conv2d_2b_3x3': (2, 64, 64, 64), + 'Conv2d_3b_1x1': (2, 32, 32, 80), + 'Conv2d_4a_3x3': (2, 32, 32, 192), + 'Conv2d_7b_1x1': (2, 4, 4, 1536), + 'MaxPool_3a_3x3': (2, 32, 32, 64), + 'MaxPool_5a_3x3': (2, 16, 16, 192), + 'Mixed_5b': (2, 16, 16, 320), + 'Mixed_6a': (2, 8, 8, 1088), + 'Mixed_7a': (2, 4, 4, 2080), + 'PreAuxLogits': (2, 8, 8, 1088)} +_SLIM_ENDPOINT_SHAPES_128_STRIDE_8 = { + 'Conv2d_1a_3x3': (2, 64, 64, 32), + 'Conv2d_2a_3x3': (2, 64, 64, 32), + 'Conv2d_2b_3x3': (2, 64, 64, 64), + 'Conv2d_3b_1x1': (2, 32, 32, 80), + 'Conv2d_4a_3x3': (2, 32, 32, 192), + 'MaxPool_3a_3x3': (2, 32, 32, 64), + 'MaxPool_5a_3x3': (2, 16, 16, 192), + 'Mixed_5b': (2, 16, 16, 320), + 'Mixed_6a': (2, 16, 16, 1088), + 'PreAuxLogits': (2, 16, 16, 1088)} +_SLIM_ENDPOINT_SHAPES_128_ALIGN_FEATURE_MAPS_FALSE = { + 'Conv2d_1a_3x3': (2, 63, 63, 32), + 'Conv2d_2a_3x3': (2, 61, 61, 32), + 'Conv2d_2b_3x3': (2, 61, 61, 64), + 'Conv2d_3b_1x1': (2, 30, 30, 80), + 'Conv2d_4a_3x3': (2, 28, 28, 192), + 'Conv2d_7b_1x1': (2, 2, 2, 1536), + 'MaxPool_3a_3x3': (2, 30, 30, 64), + 'MaxPool_5a_3x3': (2, 13, 13, 192), + 'Mixed_5b': (2, 13, 13, 320), + 'Mixed_6a': (2, 6, 6, 1088), + 'Mixed_7a': (2, 2, 2, 2080), + 'PreAuxLogits': (2, 6, 6, 1088)} +_SLIM_ENDPOINT_SHAPES_299 = {} +_SLIM_ENDPOINT_SHAPES_299_STRIDE_8 = {} +_SLIM_ENDPOINT_SHAPES_299_ALIGN_FEATURE_MAPS_FALSE = {} + +_KERAS_LAYERS_TO_CHECK = list(_KERAS_TO_SLIM_ENDPOINT_NAMES.keys()) + +_NUM_CHANNELS = 3 +_BATCH_SIZE = 2 + + +class InceptionResnetV2Test(test_case.TestCase): + + def _create_application_with_layer_outputs( + self, layer_names, batchnorm_training, + output_stride=16, + align_feature_maps=False, + batchnorm_scale=False, + weight_decay=0.00004, + default_batchnorm_momentum=0.9997, + default_batchnorm_epsilon=0.001,): + """Constructs Keras inception_resnet_v2 that extracts layer outputs.""" + # Have to clear the Keras backend to ensure isolation in layer naming + tf.keras.backend.clear_session() + if not layer_names: + layer_names = _KERAS_LAYERS_TO_CHECK + full_model = inception_resnet_v2.inception_resnet_v2( + batchnorm_training=batchnorm_training, + output_stride=output_stride, + align_feature_maps=align_feature_maps, + weights=None, + batchnorm_scale=batchnorm_scale, + weight_decay=weight_decay, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + include_top=False) + layer_outputs = [full_model.get_layer(name=layer).output + for layer in layer_names] + return tf.keras.Model( + inputs=full_model.inputs, + outputs=layer_outputs) + + def _check_returns_correct_shape( + self, image_height, image_width, + expected_feature_map_shape, layer_names=None, batchnorm_training=True, + output_stride=16, + align_feature_maps=False, + batchnorm_scale=False, + weight_decay=0.00004, + default_batchnorm_momentum=0.9997, + default_batchnorm_epsilon=0.001,): + if not layer_names: + layer_names = _KERAS_LAYERS_TO_CHECK + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=batchnorm_training, + output_stride=output_stride, + align_feature_maps=align_feature_maps, + batchnorm_scale=batchnorm_scale, + weight_decay=weight_decay, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon) + + image_tensor = np.random.rand(_BATCH_SIZE, image_height, image_width, + _NUM_CHANNELS).astype(np.float32) + feature_maps = model(image_tensor) + + for feature_map, layer_name in itertools.izip( + feature_maps, layer_names): + endpoint_name = _KERAS_TO_SLIM_ENDPOINT_NAMES[layer_name] + expected_shape = expected_feature_map_shape[endpoint_name] + self.assertAllEqual(feature_map.shape, expected_shape) + + def _get_variables(self, layer_names=None): + tf.keras.backend.clear_session() + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False) + preprocessed_inputs = tf.placeholder( + tf.float32, (4, None, None, _NUM_CHANNELS)) + model(preprocessed_inputs) + return model.variables + + def test_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + expected_feature_map_shape = ( + _SLIM_ENDPOINT_SHAPES_128) + self._check_returns_correct_shape( + image_height, image_width, expected_feature_map_shape, + align_feature_maps=True) + + def test_returns_correct_shapes_128_output_stride_8(self): + image_height = 128 + image_width = 128 + expected_feature_map_shape = ( + _SLIM_ENDPOINT_SHAPES_128_STRIDE_8) + + # Output stride of 8 not defined beyond 'block17_20_ac', which is + # PreAuxLogits in slim. So, we exclude those layers in our Keras vs Slim + # comparison. + excluded_layers = {'mixed_7a', 'conv_7b_ac'} + layer_names = [l for l in _KERAS_LAYERS_TO_CHECK + if l not in excluded_layers] + self._check_returns_correct_shape( + image_height, image_width, expected_feature_map_shape, + layer_names=layer_names, output_stride=8, align_feature_maps=True) + + def test_returns_correct_shapes_128_align_feature_maps_false( + self): + image_height = 128 + image_width = 128 + expected_feature_map_shape = ( + _SLIM_ENDPOINT_SHAPES_128_ALIGN_FEATURE_MAPS_FALSE) + self._check_returns_correct_shape( + image_height, image_width, expected_feature_map_shape, + align_feature_maps=False) + + def test_hyperparam_override(self): + model = inception_resnet_v2.inception_resnet_v2( + batchnorm_training=True, + default_batchnorm_momentum=0.2, + default_batchnorm_epsilon=0.1, + weights=None, + include_top=False) + bn_layer = model.get_layer(name='freezable_batch_norm') + self.assertAllClose(bn_layer.momentum, 0.2) + self.assertAllClose(bn_layer.epsilon, 0.1) + + def test_variable_count(self): + variables = self._get_variables() + # 896 is the number of variables from slim inception resnet v2 model. + self.assertEqual(len(variables), 896) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/keras_models/mobilenet_v1.py b/models/keras_models/mobilenet_v1.py new file mode 100644 index 0000000..e1bfb32 --- /dev/null +++ b/models/keras_models/mobilenet_v1.py @@ -0,0 +1,347 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A wrapper around the Keras MobilenetV1 models for object detection.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from object_detection.core import freezable_batch_norm +from object_detection.models.keras_models import model_utils + + +def _fixed_padding(inputs, kernel_size, rate=1): # pylint: disable=invalid-name + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +class _LayersOverride(object): + """Alternative Keras layers interface for the Keras MobileNetV1.""" + + def __init__(self, + batchnorm_training, + default_batchnorm_momentum=0.999, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None): + """Alternative tf.keras.layers interface, for use by the Keras MobileNetV1. + + It is used by the Keras applications kwargs injection API to + modify the MobilenetV1 Keras application with changes required by + the Object Detection API. + + These injected interfaces make the following changes to the network: + + - Applies the Object Detection hyperparameter configuration + - Supports FreezableBatchNorms + - Adds support for a min number of filters for each layer + - Makes the `alpha` parameter affect the final convolution block even if it + is less than 1.0 + - Adds support for explicit padding of convolutions + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default mobilenet_v1 layer builders. + use_explicit_padding: If True, use 'valid' padding for convolutions, + but explicitly pre-pads inputs so that the output dimensions are the + same as if 'same' padding were used. Off by default. + alpha: The width multiplier referenced in the MobileNetV1 paper. It + modifies the number of filters in each convolutional layer. It's called + depth multiplier in Keras application MobilenetV1. + min_depth: Minimum number of filters in the convolutional layers. + conv_defs: Network layout to specify the mobilenet_v1 body. Default is + `None` to use the default mobilenet_v1 network layout. + """ + self._alpha = alpha + self._batchnorm_training = batchnorm_training + self._default_batchnorm_momentum = default_batchnorm_momentum + self._conv_hyperparams = conv_hyperparams + self._use_explicit_padding = use_explicit_padding + self._min_depth = min_depth + self._conv_defs = conv_defs + self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5) + self.initializer = tf.truncated_normal_initializer(stddev=0.09) + + def _FixedPaddingLayer(self, kernel_size, rate=1): + return tf.keras.layers.Lambda( + lambda x: _fixed_padding(x, kernel_size, rate)) + + def Conv2D(self, filters, kernel_size, **kwargs): + """Builds a Conv2D layer according to the current Object Detection config. + + Overrides the Keras MobileNetV1 application's convolutions with ones that + follow the spec specified by the Object Detection hyperparameters. + + Args: + filters: The number of filters to use for the convolution. + kernel_size: The kernel size to specify the height and width of the 2D + convolution window. + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A one-arg callable that will either directly apply a Keras Conv2D layer to + the input argument, or that will first pad the input then apply a Conv2D + layer. + """ + layer_name = kwargs['name'] + if self._conv_defs: + conv_filters = model_utils.get_conv_def(self._conv_defs, layer_name) + if conv_filters: + filters = conv_filters + # Apply the width multiplier and the minimum depth to the convolution layers + filters = int(filters * self._alpha) + if self._min_depth and filters < self._min_depth: + filters = self._min_depth + + if self._conv_hyperparams: + kwargs = self._conv_hyperparams.params(**kwargs) + else: + kwargs['kernel_regularizer'] = self.regularizer + kwargs['kernel_initializer'] = self.initializer + + kwargs['padding'] = 'same' + if self._use_explicit_padding and kernel_size > 1: + kwargs['padding'] = 'valid' + def padded_conv(features): # pylint: disable=invalid-name + padded_features = self._FixedPaddingLayer(kernel_size)(features) + return tf.keras.layers.Conv2D( + filters, kernel_size, **kwargs)(padded_features) + return padded_conv + else: + return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) + + def DepthwiseConv2D(self, kernel_size, **kwargs): + """Builds a DepthwiseConv2D according to the Object Detection config. + + Overrides the Keras MobileNetV2 application's convolutions with ones that + follow the spec specified by the Object Detection hyperparameters. + + Args: + kernel_size: The kernel size to specify the height and width of the 2D + convolution window. + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A one-arg callable that will either directly apply a Keras DepthwiseConv2D + layer to the input argument, or that will first pad the input then apply + the depthwise convolution. + """ + if self._conv_hyperparams: + kwargs = self._conv_hyperparams.params(**kwargs) + # Both regularizer and initializaer also applies to depthwise layer in + # MobilenetV1, so we remap the kernel_* to depthwise_* here. + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + else: + kwargs['depthwise_regularizer'] = self.regularizer + kwargs['depthwise_initializer'] = self.initializer + + kwargs['padding'] = 'same' + if self._use_explicit_padding: + kwargs['padding'] = 'valid' + def padded_depthwise_conv(features): # pylint: disable=invalid-name + padded_features = self._FixedPaddingLayer(kernel_size)(features) + return tf.keras.layers.DepthwiseConv2D( + kernel_size, **kwargs)(padded_features) + return padded_depthwise_conv + else: + return tf.keras.layers.DepthwiseConv2D(kernel_size, **kwargs) + + def BatchNormalization(self, **kwargs): + """Builds a normalization layer. + + Overrides the Keras application batch norm with the norm specified by the + Object Detection configuration. + + Args: + **kwargs: Only the name is used, all other params ignored. + Required for matching `layers.BatchNormalization` calls in the Keras + application. + + Returns: + A normalization layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_batch_norm( + training=self._batchnorm_training, + name=name) + else: + return freezable_batch_norm.FreezableBatchNorm( + training=self._batchnorm_training, + epsilon=1e-3, + momentum=self._default_batchnorm_momentum, + name=name) + + def Input(self, shape): + """Builds an Input layer. + + Overrides the Keras application Input layer with one that uses a + tf.placeholder_with_default instead of a tf.placeholder. This is necessary + to ensure the application works when run on a TPU. + + Args: + shape: The shape for the input layer to use. (Does not include a dimension + for the batch size). + Returns: + An input layer for the specified shape that internally uses a + placeholder_with_default. + """ + default_size = 224 + default_batch_size = 1 + shape = list(shape) + default_shape = [default_size if dim is None else dim for dim in shape] + + input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape) + + placeholder_with_default = tf.placeholder_with_default( + input=input_tensor, shape=[None] + shape) + return model_utils.input_layer(shape, placeholder_with_default) + + # pylint: disable=unused-argument + def ReLU(self, *args, **kwargs): + """Builds an activation layer. + + Overrides the Keras application ReLU with the activation specified by the + Object Detection configuration. + + Args: + *args: Ignored, required to match the `tf.keras.ReLU` interface + **kwargs: Only the name is used, + required to match `tf.keras.ReLU` interface + + Returns: + An activation layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_activation_layer(name=name) + else: + return tf.keras.layers.Lambda(tf.nn.relu6, name=name) + # pylint: enable=unused-argument + + # pylint: disable=unused-argument + def ZeroPadding2D(self, padding, **kwargs): + """Replaces explicit padding in the Keras application with a no-op. + + Args: + padding: The padding values for image height and width. + **kwargs: Ignored, required to match the Keras applications usage. + + Returns: + A no-op identity lambda. + """ + return lambda x: x + # pylint: enable=unused-argument + + # Forward all non-overridden methods to the keras layers + def __getattr__(self, item): + return getattr(tf.keras.layers, item) + + +# pylint: disable=invalid-name +def mobilenet_v1(batchnorm_training, + default_batchnorm_momentum=0.9997, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None, + **kwargs): + """Instantiates the MobileNetV1 architecture, modified for object detection. + + This wraps the MobileNetV1 tensorflow Keras application, but uses the + Keras application's kwargs-based monkey-patching API to override the Keras + architecture with the following changes: + + - Changes the default batchnorm momentum to 0.9997 + - Applies the Object Detection hyperparameter configuration + - Supports FreezableBatchNorms + - Adds support for a min number of filters for each layer + - Makes the `alpha` parameter affect the final convolution block even if it + is less than 1.0 + - Adds support for explicit padding of convolutions + - Makes the Input layer use a tf.placeholder_with_default instead of a + tf.placeholder, to work on TPUs. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default mobilenet_v1 layer builders. + use_explicit_padding: If True, use 'valid' padding for convolutions, + but explicitly pre-pads inputs so that the output dimensions are the + same as if 'same' padding were used. Off by default. + alpha: The width multiplier referenced in the MobileNetV1 paper. It + modifies the number of filters in each convolutional layer. + min_depth: Minimum number of filters in the convolutional layers. + conv_defs: Network layout to specify the mobilenet_v1 body. Default is + `None` to use the default mobilenet_v1 network layout. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.Mobilenet` method that constructs the Keras + model. + + Returns: + A Keras model instance. + """ + layers_override = _LayersOverride( + batchnorm_training, + default_batchnorm_momentum=default_batchnorm_momentum, + conv_hyperparams=conv_hyperparams, + use_explicit_padding=use_explicit_padding, + min_depth=min_depth, + alpha=alpha, + conv_defs=conv_defs) + return tf.keras.applications.MobileNet( + alpha=alpha, layers=layers_override, **kwargs) +# pylint: enable=invalid-name diff --git a/models/keras_models/mobilenet_v1_test.py b/models/keras_models/mobilenet_v1_test.py new file mode 100644 index 0000000..9e1d349 --- /dev/null +++ b/models/keras_models/mobilenet_v1_test.py @@ -0,0 +1,258 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for mobilenet_v1.py. + +This test mainly focuses on comparing slim MobilenetV1 and Keras MobilenetV1 for +object detection. To verify the consistency of the two models, we compare: + 1. Output shape of each layer given different inputs + 2. Number of global variables + +We also visualize the model structure via Tensorboard, and compare the model +layout and the parameters of each Op to make sure the two implementations are +consistent. +""" + +import itertools +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import hyperparams_builder +from object_detection.models.keras_models import mobilenet_v1 +from object_detection.models.keras_models import model_utils +from object_detection.models.keras_models import test_utils +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + +_KERAS_LAYERS_TO_CHECK = [ + 'conv1_relu', + 'conv_dw_1_relu', 'conv_pw_1_relu', + 'conv_dw_2_relu', 'conv_pw_2_relu', + 'conv_dw_3_relu', 'conv_pw_3_relu', + 'conv_dw_4_relu', 'conv_pw_4_relu', + 'conv_dw_5_relu', 'conv_pw_5_relu', + 'conv_dw_6_relu', 'conv_pw_6_relu', + 'conv_dw_7_relu', 'conv_pw_7_relu', + 'conv_dw_8_relu', 'conv_pw_8_relu', + 'conv_dw_9_relu', 'conv_pw_9_relu', + 'conv_dw_10_relu', 'conv_pw_10_relu', + 'conv_dw_11_relu', 'conv_pw_11_relu', + 'conv_dw_12_relu', 'conv_pw_12_relu', + 'conv_dw_13_relu', 'conv_pw_13_relu', +] + +_NUM_CHANNELS = 3 +_BATCH_SIZE = 2 + + +class MobilenetV1Test(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + train: true, + scale: false, + center: true, + decay: 0.2, + epsilon: 0.1, + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def _create_application_with_layer_outputs( + self, layer_names, batchnorm_training, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None): + """Constructs Keras MobilenetV1 that extracts intermediate layer outputs.""" + if not layer_names: + layer_names = _KERAS_LAYERS_TO_CHECK + full_model = mobilenet_v1.mobilenet_v1( + batchnorm_training=batchnorm_training, + conv_hyperparams=conv_hyperparams, + weights=None, + use_explicit_padding=use_explicit_padding, + alpha=alpha, + min_depth=min_depth, + conv_defs=conv_defs, + include_top=False) + layer_outputs = [full_model.get_layer(name=layer).output + for layer in layer_names] + return tf.keras.Model( + inputs=full_model.inputs, + outputs=layer_outputs) + + def _check_returns_correct_shape( + self, image_height, image_width, depth_multiplier, + expected_feature_map_shape, use_explicit_padding=False, min_depth=8, + layer_names=None, conv_defs=None): + def graph_fn(image_tensor): + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, + use_explicit_padding=use_explicit_padding, + min_depth=min_depth, + alpha=depth_multiplier, + conv_defs=conv_defs) + return model(image_tensor) + + image_tensor = np.random.rand(_BATCH_SIZE, image_height, image_width, + _NUM_CHANNELS).astype(np.float32) + feature_maps = self.execute(graph_fn, [image_tensor]) + + for feature_map, expected_shape in itertools.izip( + feature_maps, expected_feature_map_shape): + self.assertAllEqual(feature_map.shape, expected_shape) + + def _check_returns_correct_shapes_with_dynamic_inputs( + self, image_height, image_width, depth_multiplier, + expected_feature_map_shape, use_explicit_padding=False, min_depth=8, + layer_names=None): + def graph_fn(image_height, image_width): + image_tensor = tf.random_uniform([_BATCH_SIZE, image_height, image_width, + _NUM_CHANNELS], dtype=tf.float32) + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, + use_explicit_padding=use_explicit_padding, + alpha=depth_multiplier) + return model(image_tensor) + + feature_maps = self.execute_cpu(graph_fn, [ + np.array(image_height, dtype=np.int32), + np.array(image_width, dtype=np.int32) + ]) + + for feature_map, expected_shape in itertools.izip( + feature_maps, expected_feature_map_shape): + self.assertAllEqual(feature_map.shape, expected_shape) + + def _get_variables(self, depth_multiplier, layer_names=None): + g = tf.Graph() + with g.as_default(): + preprocessed_inputs = tf.placeholder( + tf.float32, (4, None, None, _NUM_CHANNELS)) + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, use_explicit_padding=False, + alpha=depth_multiplier) + model(preprocessed_inputs) + return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + + def test_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v1_expected_feature_map_shape_128) + self._check_returns_correct_shape( + image_height, image_width, depth_multiplier, expected_feature_map_shape) + + def test_returns_correct_shapes_128_explicit_padding( + self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v1_expected_feature_map_shape_128_explicit_padding) + self._check_returns_correct_shape( + image_height, image_width, depth_multiplier, expected_feature_map_shape, + use_explicit_padding=True) + + def test_returns_correct_shapes_with_dynamic_inputs( + self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.mobilenet_v1_expected_feature_map_shape_with_dynamic_inputs) + self._check_returns_correct_shapes_with_dynamic_inputs( + image_height, image_width, depth_multiplier, expected_feature_map_shape) + + def test_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v1_expected_feature_map_shape_299) + self._check_returns_correct_shape( + image_height, image_width, depth_multiplier, expected_feature_map_shape) + + def test_returns_correct_shapes_enforcing_min_depth( + self): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + expected_feature_map_shape = ( + test_utils.moblenet_v1_expected_feature_map_shape_enforcing_min_depth) + self._check_returns_correct_shape( + image_height, image_width, depth_multiplier, expected_feature_map_shape) + + def test_returns_correct_shapes_with_conv_defs( + self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + conv_def_block_12 = model_utils.ConvDefs( + conv_name='conv_pw_12', filters=512) + conv_def_block_13 = model_utils.ConvDefs( + conv_name='conv_pw_13', filters=256) + conv_defs = [conv_def_block_12, conv_def_block_13] + + expected_feature_map_shape = ( + test_utils.moblenet_v1_expected_feature_map_shape_with_conv_defs) + self._check_returns_correct_shape( + image_height, image_width, depth_multiplier, expected_feature_map_shape, + conv_defs=conv_defs) + + def test_hyperparam_override(self): + hyperparams = self._build_conv_hyperparams() + model = mobilenet_v1.mobilenet_v1( + batchnorm_training=True, + conv_hyperparams=hyperparams, + weights=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=32, + include_top=False) + hyperparams.params() + bn_layer = model.get_layer(name='conv_pw_5_bn') + self.assertAllClose(bn_layer.momentum, 0.2) + self.assertAllClose(bn_layer.epsilon, 0.1) + + def test_variable_count(self): + depth_multiplier = 1 + variables = self._get_variables(depth_multiplier) + # 135 is the number of variables from slim MobilenetV1 model. + self.assertEqual(len(variables), 135) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/keras_models/mobilenet_v2.py b/models/keras_models/mobilenet_v2.py new file mode 100644 index 0000000..7f8ed50 --- /dev/null +++ b/models/keras_models/mobilenet_v2.py @@ -0,0 +1,334 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A wrapper around the MobileNet v2 models for Keras, for object detection.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from object_detection.core import freezable_batch_norm +from object_detection.models.keras_models import model_utils +from object_detection.utils import ops + + +# pylint: disable=invalid-name +# This method copied from the slim mobilenet base network code (same license) +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class _LayersOverride(object): + """Alternative Keras layers interface for the Keras MobileNetV2.""" + + def __init__(self, + batchnorm_training, + default_batchnorm_momentum=0.999, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None): + """Alternative tf.keras.layers interface, for use by the Keras MobileNetV2. + + It is used by the Keras applications kwargs injection API to + modify the Mobilenet v2 Keras application with changes required by + the Object Detection API. + + These injected interfaces make the following changes to the network: + + - Applies the Object Detection hyperparameter configuration + - Supports FreezableBatchNorms + - Adds support for a min number of filters for each layer + - Makes the `alpha` parameter affect the final convolution block even if it + is less than 1.0 + - Adds support for explicit padding of convolutions + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default mobilenet_v2 layer builders. + use_explicit_padding: If True, use 'valid' padding for convolutions, + but explicitly pre-pads inputs so that the output dimensions are the + same as if 'same' padding were used. Off by default. + alpha: The width multiplier referenced in the MobileNetV2 paper. It + modifies the number of filters in each convolutional layer. + min_depth: Minimum number of filters in the convolutional layers. + conv_defs: Network layout to specify the mobilenet_v2 body. Default is + `None` to use the default mobilenet_v2 network layout. + """ + self._alpha = alpha + self._batchnorm_training = batchnorm_training + self._default_batchnorm_momentum = default_batchnorm_momentum + self._conv_hyperparams = conv_hyperparams + self._use_explicit_padding = use_explicit_padding + self._min_depth = min_depth + self._conv_defs = conv_defs + self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5) + self.initializer = tf.truncated_normal_initializer(stddev=0.09) + + def _FixedPaddingLayer(self, kernel_size): + return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size)) + + def Conv2D(self, filters, **kwargs): + """Builds a Conv2D layer according to the current Object Detection config. + + Overrides the Keras MobileNetV2 application's convolutions with ones that + follow the spec specified by the Object Detection hyperparameters. + + Args: + filters: The number of filters to use for the convolution. + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A one-arg callable that will either directly apply a Keras Conv2D layer to + the input argument, or that will first pad the input then apply a Conv2D + layer. + """ + # Make sure 'alpha' is always applied to the last convolution block's size + # (This overrides the Keras application's functionality) + layer_name = kwargs.get('name') + if layer_name == 'Conv_1': + if self._conv_defs: + filters = model_utils.get_conv_def(self._conv_defs, 'Conv_1') + else: + filters = 1280 + if self._alpha < 1.0: + filters = _make_divisible(filters * self._alpha, 8) + + # Apply the minimum depth to the convolution layers + if (self._min_depth and (filters < self._min_depth) + and not kwargs.get('name').endswith('expand')): + filters = self._min_depth + + if self._conv_hyperparams: + kwargs = self._conv_hyperparams.params(**kwargs) + else: + kwargs['kernel_regularizer'] = self.regularizer + kwargs['kernel_initializer'] = self.initializer + + kwargs['padding'] = 'same' + kernel_size = kwargs.get('kernel_size') + if self._use_explicit_padding and kernel_size > 1: + kwargs['padding'] = 'valid' + def padded_conv(features): + padded_features = self._FixedPaddingLayer(kernel_size)(features) + return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features) + + return padded_conv + else: + return tf.keras.layers.Conv2D(filters, **kwargs) + + def DepthwiseConv2D(self, **kwargs): + """Builds a DepthwiseConv2D according to the Object Detection config. + + Overrides the Keras MobileNetV2 application's convolutions with ones that + follow the spec specified by the Object Detection hyperparameters. + + Args: + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A one-arg callable that will either directly apply a Keras DepthwiseConv2D + layer to the input argument, or that will first pad the input then apply + the depthwise convolution. + """ + if self._conv_hyperparams: + kwargs = self._conv_hyperparams.params(**kwargs) + # Both the regularizer and initializer apply to the depthwise layer in + # MobilenetV1, so we remap the kernel_* to depthwise_* here. + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + else: + kwargs['depthwise_regularizer'] = self.regularizer + kwargs['depthwise_initializer'] = self.initializer + + kwargs['padding'] = 'same' + kernel_size = kwargs.get('kernel_size') + if self._use_explicit_padding and kernel_size > 1: + kwargs['padding'] = 'valid' + def padded_depthwise_conv(features): + padded_features = self._FixedPaddingLayer(kernel_size)(features) + return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features) + + return padded_depthwise_conv + else: + return tf.keras.layers.DepthwiseConv2D(**kwargs) + + def BatchNormalization(self, **kwargs): + """Builds a normalization layer. + + Overrides the Keras application batch norm with the norm specified by the + Object Detection configuration. + + Args: + **kwargs: Only the name is used, all other params ignored. + Required for matching `layers.BatchNormalization` calls in the Keras + application. + + Returns: + A normalization layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_batch_norm( + training=self._batchnorm_training, + name=name) + else: + return freezable_batch_norm.FreezableBatchNorm( + training=self._batchnorm_training, + epsilon=1e-3, + momentum=self._default_batchnorm_momentum, + name=name) + + def Input(self, shape): + """Builds an Input layer. + + Overrides the Keras application Input layer with one that uses a + tf.placeholder_with_default instead of a tf.placeholder. This is necessary + to ensure the application works when run on a TPU. + + Args: + shape: The shape for the input layer to use. (Does not include a dimension + for the batch size). + Returns: + An input layer for the specified shape that internally uses a + placeholder_with_default. + """ + default_size = 224 + default_batch_size = 1 + shape = list(shape) + default_shape = [default_size if dim is None else dim for dim in shape] + + input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape) + + placeholder_with_default = tf.placeholder_with_default( + input=input_tensor, shape=[None] + shape) + return model_utils.input_layer(shape, placeholder_with_default) + + # pylint: disable=unused-argument + def ReLU(self, *args, **kwargs): + """Builds an activation layer. + + Overrides the Keras application ReLU with the activation specified by the + Object Detection configuration. + + Args: + *args: Ignored, required to match the `tf.keras.ReLU` interface + **kwargs: Only the name is used, + required to match `tf.keras.ReLU` interface + + Returns: + An activation layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_activation_layer(name=name) + else: + return tf.keras.layers.Lambda(tf.nn.relu6, name=name) + # pylint: enable=unused-argument + + # pylint: disable=unused-argument + def ZeroPadding2D(self, **kwargs): + """Replaces explicit padding in the Keras application with a no-op. + + Args: + **kwargs: Ignored, required to match the Keras applications usage. + + Returns: + A no-op identity lambda. + """ + return lambda x: x + # pylint: enable=unused-argument + + # Forward all non-overridden methods to the keras layers + def __getattr__(self, item): + return getattr(tf.keras.layers, item) + + +def mobilenet_v2(batchnorm_training, + default_batchnorm_momentum=0.9997, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None, + **kwargs): + """Instantiates the MobileNetV2 architecture, modified for object detection. + + This wraps the MobileNetV2 tensorflow Keras application, but uses the + Keras application's kwargs-based monkey-patching API to override the Keras + architecture with the following changes: + + - Changes the default batchnorm momentum to 0.9997 + - Applies the Object Detection hyperparameter configuration + - Supports FreezableBatchNorms + - Adds support for a min number of filters for each layer + - Makes the `alpha` parameter affect the final convolution block even if it + is less than 1.0 + - Adds support for explicit padding of convolutions + - Makes the Input layer use a tf.placeholder_with_default instead of a + tf.placeholder, to work on TPUs. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default mobilenet_v2 layer builders. + use_explicit_padding: If True, use 'valid' padding for convolutions, + but explicitly pre-pads inputs so that the output dimensions are the + same as if 'same' padding were used. Off by default. + alpha: The width multiplier referenced in the MobileNetV2 paper. It + modifies the number of filters in each convolutional layer. + min_depth: Minimum number of filters in the convolutional layers. + conv_defs: Network layout to specify the mobilenet_v2 body. Default is + `None` to use the default mobilenet_v2 network layout. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.MobilenetV2` method that constructs the Keras + model. + + Returns: + A Keras model instance. + """ + layers_override = _LayersOverride( + batchnorm_training, + default_batchnorm_momentum=default_batchnorm_momentum, + conv_hyperparams=conv_hyperparams, + use_explicit_padding=use_explicit_padding, + min_depth=min_depth, + alpha=alpha, + conv_defs=conv_defs) + return tf.keras.applications.MobileNetV2(alpha=alpha, + layers=layers_override, + **kwargs) +# pylint: enable=invalid-name diff --git a/models/keras_models/mobilenet_v2_test.py b/models/keras_models/mobilenet_v2_test.py new file mode 100644 index 0000000..5ec8aae --- /dev/null +++ b/models/keras_models/mobilenet_v2_test.py @@ -0,0 +1,247 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for mobilenet_v2.""" +import itertools +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import hyperparams_builder +from object_detection.models.keras_models import mobilenet_v2 +from object_detection.models.keras_models import model_utils +from object_detection.models.keras_models import test_utils +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + +_layers_to_check = [ + 'Conv1_relu', + 'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN', + 'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN', + 'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN', + 'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN', + 'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN', + 'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN', + 'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN', + 'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN', + 'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN', + 'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN', + 'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN', + 'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN', + 'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN', + 'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN', + 'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN', + 'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN', + 'out_relu'] + + +class MobilenetV2Test(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + batch_norm { + train: true, + scale: false, + center: true, + decay: 0.2, + epsilon: 0.1, + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def _create_application_with_layer_outputs( + self, layer_names, batchnorm_training, + conv_hyperparams=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=None, + conv_defs=None): + """Constructs Keras mobilenetv2 that extracts intermediate layer outputs.""" + if not layer_names: + layer_names = _layers_to_check + full_model = mobilenet_v2.mobilenet_v2( + batchnorm_training=batchnorm_training, + conv_hyperparams=conv_hyperparams, + weights=None, + use_explicit_padding=use_explicit_padding, + alpha=alpha, + min_depth=min_depth, + include_top=False, + conv_defs=conv_defs) + layer_outputs = [full_model.get_layer(name=layer).output + for layer in layer_names] + return tf.keras.Model( + inputs=full_model.inputs, + outputs=layer_outputs) + + def _check_returns_correct_shape( + self, batch_size, image_height, image_width, depth_multiplier, + expected_feature_map_shapes, use_explicit_padding=False, min_depth=None, + layer_names=None, conv_defs=None): + def graph_fn(image_tensor): + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, + use_explicit_padding=use_explicit_padding, + min_depth=min_depth, + alpha=depth_multiplier, + conv_defs=conv_defs) + return model(image_tensor) + + image_tensor = np.random.rand(batch_size, image_height, image_width, + 3).astype(np.float32) + feature_maps = self.execute(graph_fn, [image_tensor]) + + for feature_map, expected_shape in itertools.izip( + feature_maps, expected_feature_map_shapes): + self.assertAllEqual(feature_map.shape, expected_shape) + + def _check_returns_correct_shapes_with_dynamic_inputs( + self, batch_size, image_height, image_width, depth_multiplier, + expected_feature_map_shapes, use_explicit_padding=False, + layer_names=None): + def graph_fn(image_height, image_width): + image_tensor = tf.random_uniform([batch_size, image_height, image_width, + 3], dtype=tf.float32) + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, use_explicit_padding=use_explicit_padding, + alpha=depth_multiplier) + return model(image_tensor) + + feature_maps = self.execute_cpu(graph_fn, [ + np.array(image_height, dtype=np.int32), + np.array(image_width, dtype=np.int32) + ]) + + for feature_map, expected_shape in itertools.izip( + feature_maps, expected_feature_map_shapes): + self.assertAllEqual(feature_map.shape, expected_shape) + + def _get_variables(self, depth_multiplier, layer_names=None): + g = tf.Graph() + with g.as_default(): + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + model = self._create_application_with_layer_outputs( + layer_names=layer_names, + batchnorm_training=False, use_explicit_padding=False, + alpha=depth_multiplier) + model(preprocessed_inputs) + return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + + def test_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v2_expected_feature_map_shape_128) + + self._check_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape) + + def test_returns_correct_shapes_128_explicit_padding( + self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v2_expected_feature_map_shape_128_explicit_padding) + self._check_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape, use_explicit_padding=True) + + def test_returns_correct_shapes_with_dynamic_inputs( + self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.mobilenet_v2_expected_feature_map_shape_with_dynamic_inputs) + self._check_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape) + + def test_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + expected_feature_map_shape = ( + test_utils.moblenet_v2_expected_feature_map_shape_299) + self._check_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape) + + def test_returns_correct_shapes_enforcing_min_depth( + self): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + expected_feature_map_shape = ( + test_utils.moblenet_v2_expected_feature_map_shape_enforcing_min_depth) + self._check_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape, min_depth=32) + + def test_returns_correct_shapes_with_conv_defs( + self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + conv_1 = model_utils.ConvDefs( + conv_name='Conv_1', filters=256) + conv_defs = [conv_1] + + expected_feature_map_shape = ( + test_utils.moblenet_v2_expected_feature_map_shape_with_conv_defs) + self._check_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, + expected_feature_map_shape, conv_defs=conv_defs) + + def test_hyperparam_override(self): + hyperparams = self._build_conv_hyperparams() + model = mobilenet_v2.mobilenet_v2( + batchnorm_training=True, + conv_hyperparams=hyperparams, + weights=None, + use_explicit_padding=False, + alpha=1.0, + min_depth=32, + include_top=False) + hyperparams.params() + bn_layer = model.get_layer(name='block_5_project_BN') + self.assertAllClose(bn_layer.momentum, 0.2) + self.assertAllClose(bn_layer.epsilon, 0.1) + + def test_variable_count(self): + depth_multiplier = 1 + variables = self._get_variables(depth_multiplier) + self.assertEqual(len(variables), 260) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/keras_models/model_utils.py b/models/keras_models/model_utils.py new file mode 100644 index 0000000..1576fe9 --- /dev/null +++ b/models/keras_models/model_utils.py @@ -0,0 +1,53 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utils for Keras models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import tensorflow as tf + +# This is to specify the custom config of model structures. For example, +# ConvDefs(conv_name='conv_pw_12', filters=512) for Mobilenet V1 is to specify +# the filters of the conv layer with name 'conv_pw_12' as 512.s +ConvDefs = collections.namedtuple('ConvDefs', ['conv_name', 'filters']) + + +def get_conv_def(conv_defs, layer_name): + """Get the custom config for some layer of the model structure. + + Args: + conv_defs: A named tuple to specify the custom config of the model + network. See `ConvDefs` for details. + layer_name: A string, the name of the layer to be customized. + + Returns: + The number of filters for the layer, or `None` if there is no custom + config for the requested layer. + """ + for conv_def in conv_defs: + if layer_name == conv_def.conv_name: + return conv_def.filters + return None + + +def input_layer(shape, placeholder_with_default): + if tf.executing_eagerly(): + return tf.keras.layers.Input(shape=shape) + else: + return tf.keras.layers.Input(tensor=placeholder_with_default) diff --git a/models/keras_models/resnet_v1.py b/models/keras_models/resnet_v1.py new file mode 100644 index 0000000..12b7112 --- /dev/null +++ b/models/keras_models/resnet_v1.py @@ -0,0 +1,397 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A wrapper around the Keras Resnet V1 models for object detection.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from object_detection.core import freezable_batch_norm +from object_detection.models.keras_models import model_utils + + +def _fixed_padding(inputs, kernel_size, rate=1): # pylint: disable=invalid-name + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) + pad_total = kernel_size_effective - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + padded_inputs = tf.pad( + inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) + return padded_inputs + + +class _LayersOverride(object): + """Alternative Keras layers interface for the Keras Resnet V1.""" + + def __init__(self, + batchnorm_training, + batchnorm_scale=True, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5, + weight_decay=0.0001, + conv_hyperparams=None, + min_depth=8, + depth_multiplier=1): + """Alternative tf.keras.layers interface, for use by the Keras Resnet V1. + + The class is used by the Keras applications kwargs injection API to + modify the Resnet V1 Keras application with changes required by + the Object Detection API. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale + the activations in the batch normalization layer. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + default_batchnorm_epsilon: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the epsilon. + weight_decay: The weight decay to use for regularizing the model. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default resnet_v1 layer builders. + min_depth: Minimum number of filters in the convolutional layers. + depth_multiplier: The depth multiplier to modify the number of filters + in the convolutional layers. + """ + self._batchnorm_training = batchnorm_training + self._batchnorm_scale = batchnorm_scale + self._default_batchnorm_momentum = default_batchnorm_momentum + self._default_batchnorm_epsilon = default_batchnorm_epsilon + self._conv_hyperparams = conv_hyperparams + self._min_depth = min_depth + self._depth_multiplier = depth_multiplier + self.regularizer = tf.keras.regularizers.l2(weight_decay) + self.initializer = tf.variance_scaling_initializer() + + def _FixedPaddingLayer(self, kernel_size, rate=1): + return tf.keras.layers.Lambda( + lambda x: _fixed_padding(x, kernel_size, rate)) + + def Conv2D(self, filters, kernel_size, **kwargs): + """Builds a Conv2D layer according to the current Object Detection config. + + Overrides the Keras Resnet application's convolutions with ones that + follow the spec specified by the Object Detection hyperparameters. + + Args: + filters: The number of filters to use for the convolution. + kernel_size: The kernel size to specify the height and width of the 2D + convolution window. + **kwargs: Keyword args specified by the Keras application for + constructing the convolution. + + Returns: + A one-arg callable that will either directly apply a Keras Conv2D layer to + the input argument, or that will first pad the input then apply a Conv2D + layer. + """ + # Apply the minimum depth to the convolution layers. + filters = max(int(filters * self._depth_multiplier), self._min_depth) + + if self._conv_hyperparams: + kwargs = self._conv_hyperparams.params(**kwargs) + else: + kwargs['kernel_regularizer'] = self.regularizer + kwargs['kernel_initializer'] = self.initializer + + # Set use_bias as false to keep it consistent with Slim Resnet model. + kwargs['use_bias'] = False + + kwargs['padding'] = 'same' + stride = kwargs.get('strides') + if stride and kernel_size and stride > 1 and kernel_size > 1: + kwargs['padding'] = 'valid' + def padded_conv(features): # pylint: disable=invalid-name + padded_features = self._FixedPaddingLayer(kernel_size)(features) + return tf.keras.layers.Conv2D( + filters, kernel_size, **kwargs)(padded_features) + return padded_conv + else: + return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) + + def Activation(self, *args, **kwargs): # pylint: disable=unused-argument + """Builds an activation layer. + + Overrides the Keras application Activation layer specified by the + Object Detection configuration. + + Args: + *args: Ignored, + required to match the `tf.keras.layers.Activation` interface. + **kwargs: Only the name is used, + required to match `tf.keras.layers.Activation` interface. + + Returns: + An activation layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_activation_layer(name=name) + else: + return tf.keras.layers.Lambda(tf.nn.relu, name=name) + + def BatchNormalization(self, **kwargs): + """Builds a normalization layer. + + Overrides the Keras application batch norm with the norm specified by the + Object Detection configuration. + + Args: + **kwargs: Only the name is used, all other params ignored. + Required for matching `layers.BatchNormalization` calls in the Keras + application. + + Returns: + A normalization layer specified by the Object Detection hyperparameter + configurations. + """ + name = kwargs.get('name') + if self._conv_hyperparams: + return self._conv_hyperparams.build_batch_norm( + training=self._batchnorm_training, + name=name) + else: + kwargs['scale'] = self._batchnorm_scale + kwargs['epsilon'] = self._default_batchnorm_epsilon + return freezable_batch_norm.FreezableBatchNorm( + training=self._batchnorm_training, + momentum=self._default_batchnorm_momentum, + **kwargs) + + def Input(self, shape): + """Builds an Input layer. + + Overrides the Keras application Input layer with one that uses a + tf.placeholder_with_default instead of a tf.placeholder. This is necessary + to ensure the application works when run on a TPU. + + Args: + shape: A tuple of integers representing the shape of the input, which + includes both spatial share and channels, but not the batch size. + Elements of this tuple can be None; 'None' elements represent dimensions + where the shape is not known. + + Returns: + An input layer for the specified shape that internally uses a + placeholder_with_default. + """ + default_size = 224 + default_batch_size = 1 + shape = list(shape) + default_shape = [default_size if dim is None else dim for dim in shape] + + input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape) + + placeholder_with_default = tf.placeholder_with_default( + input=input_tensor, shape=[None] + shape) + return model_utils.input_layer(shape, placeholder_with_default) + + def MaxPooling2D(self, pool_size, **kwargs): + """Builds a MaxPooling2D layer with default padding as 'SAME'. + + This is specified by the default resnet arg_scope in slim. + + Args: + pool_size: The pool size specified by the Keras application. + **kwargs: Ignored, required to match the Keras applications usage. + + Returns: + A MaxPooling2D layer with default padding as 'SAME'. + """ + kwargs['padding'] = 'same' + return tf.keras.layers.MaxPooling2D(pool_size, **kwargs) + + # Add alias as Keras also has it. + MaxPool2D = MaxPooling2D # pylint: disable=invalid-name + + def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument + """Replaces explicit padding in the Keras application with a no-op. + + Args: + padding: The padding values for image height and width. + **kwargs: Ignored, required to match the Keras applications usage. + + Returns: + A no-op identity lambda. + """ + return lambda x: x + + # Forward all non-overridden methods to the keras layers + def __getattr__(self, item): + return getattr(tf.keras.layers, item) + + +# pylint: disable=invalid-name +def resnet_v1_50(batchnorm_training, + batchnorm_scale=True, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5, + weight_decay=0.0001, + conv_hyperparams=None, + min_depth=8, + depth_multiplier=1, + **kwargs): + """Instantiates the Resnet50 architecture, modified for object detection. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale + the activations in the batch normalization layer. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + default_batchnorm_epsilon: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the epsilon. + weight_decay: The weight decay to use for regularizing the model. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default resnet_v1 layer builders. + min_depth: Minimum number of filters in the convolutional layers. + depth_multiplier: The depth multiplier to modify the number of filters + in the convolutional layers. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.Mobilenet` method that constructs the Keras + model. + + Returns: + A Keras ResnetV1-50 model instance. + """ + layers_override = _LayersOverride( + batchnorm_training, + batchnorm_scale=batchnorm_scale, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + conv_hyperparams=conv_hyperparams, + weight_decay=weight_decay, + min_depth=min_depth, + depth_multiplier=depth_multiplier) + return tf.keras.applications.resnet.ResNet50( + layers=layers_override, **kwargs) + + +def resnet_v1_101(batchnorm_training, + batchnorm_scale=True, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5, + weight_decay=0.0001, + conv_hyperparams=None, + min_depth=8, + depth_multiplier=1, + **kwargs): + """Instantiates the Resnet50 architecture, modified for object detection. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale + the activations in the batch normalization layer. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + default_batchnorm_epsilon: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the epsilon. + weight_decay: The weight decay to use for regularizing the model. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default resnet_v1 layer builders. + min_depth: Minimum number of filters in the convolutional layers. + depth_multiplier: The depth multiplier to modify the number of filters + in the convolutional layers. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.Mobilenet` method that constructs the Keras + model. + + Returns: + A Keras ResnetV1-101 model instance. + """ + layers_override = _LayersOverride( + batchnorm_training, + batchnorm_scale=batchnorm_scale, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + conv_hyperparams=conv_hyperparams, + weight_decay=weight_decay, + min_depth=min_depth, + depth_multiplier=depth_multiplier) + return tf.keras.applications.resnet.ResNet101( + layers=layers_override, **kwargs) + + +def resnet_v1_152(batchnorm_training, + batchnorm_scale=True, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5, + weight_decay=0.0001, + conv_hyperparams=None, + min_depth=8, + depth_multiplier=1, + **kwargs): + """Instantiates the Resnet50 architecture, modified for object detection. + + Args: + batchnorm_training: Bool. Assigned to Batch norm layer `training` param + when constructing `freezable_batch_norm.FreezableBatchNorm` layers. + batchnorm_scale: If True, uses an explicit `gamma` multiplier to scale + the activations in the batch normalization layer. + default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the momentum. + default_batchnorm_epsilon: Float. When 'conv_hyperparams' is None, + batch norm layers will be constructed using this value as the epsilon. + weight_decay: The weight decay to use for regularizing the model. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. Optionally set to `None` + to use default resnet_v1 layer builders. + min_depth: Minimum number of filters in the convolutional layers. + depth_multiplier: The depth multiplier to modify the number of filters + in the convolutional layers. + **kwargs: Keyword arguments forwarded directly to the + `tf.keras.applications.Mobilenet` method that constructs the Keras + model. + + Returns: + A Keras ResnetV1-152 model instance. + """ + layers_override = _LayersOverride( + batchnorm_training, + batchnorm_scale=batchnorm_scale, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + conv_hyperparams=conv_hyperparams, + weight_decay=weight_decay, + min_depth=min_depth, + depth_multiplier=depth_multiplier) + return tf.keras.applications.resnet.ResNet152( + layers=layers_override, **kwargs) +# pylint: enable=invalid-name diff --git a/models/keras_models/resnet_v1_test.py b/models/keras_models/resnet_v1_test.py new file mode 100644 index 0000000..43df415 --- /dev/null +++ b/models/keras_models/resnet_v1_test.py @@ -0,0 +1,183 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for resnet_v1.py. + +This test mainly focuses on comparing slim resnet v1 and Keras resnet v1 for +object detection. To verify the consistency of the two models, we compare: + 1. Output shape of each layer given different inputs. + 2. Number of global variables. +""" + +import numpy as np +from six.moves import zip +import tensorflow as tf + +from google.protobuf import text_format + +from object_detection.builders import hyperparams_builder +from object_detection.models.keras_models import resnet_v1 +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + +_EXPECTED_SHAPES_224_RESNET50 = { + 'conv2_block3_out': (4, 56, 56, 256), + 'conv3_block4_out': (4, 28, 28, 512), + 'conv4_block6_out': (4, 14, 14, 1024), + 'conv5_block3_out': (4, 7, 7, 2048), +} + +_EXPECTED_SHAPES_224_RESNET101 = { + 'conv2_block3_out': (4, 56, 56, 256), + 'conv3_block4_out': (4, 28, 28, 512), + 'conv4_block23_out': (4, 14, 14, 1024), + 'conv5_block3_out': (4, 7, 7, 2048), +} + +_EXPECTED_SHAPES_224_RESNET152 = { + 'conv2_block3_out': (4, 56, 56, 256), + 'conv3_block8_out': (4, 28, 28, 512), + 'conv4_block36_out': (4, 14, 14, 1024), + 'conv5_block3_out': (4, 7, 7, 2048), +} + +_RESNET_NAMES = ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152'] +_RESNET_MODELS = [ + resnet_v1.resnet_v1_50, resnet_v1.resnet_v1_101, resnet_v1.resnet_v1_152 +] +_RESNET_SHAPES = [ + _EXPECTED_SHAPES_224_RESNET50, _EXPECTED_SHAPES_224_RESNET101, + _EXPECTED_SHAPES_224_RESNET152 +] + +_NUM_CHANNELS = 3 +_BATCH_SIZE = 4 + + +class ResnetV1Test(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6, + regularizer { + l2_regularizer { + weight: 0.0004 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.03 + mean: 0.0 + } + } + batch_norm { + scale: true, + decay: 0.997, + epsilon: 0.001, + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def _create_application_with_layer_outputs(self, + model_index, + batchnorm_training, + batchnorm_scale=True, + weight_decay=0.0001, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5): + """Constructs Keras resnet_v1 that extracts layer outputs.""" + # Have to clear the Keras backend to ensure isolation in layer naming + tf.keras.backend.clear_session() + layer_names = _RESNET_SHAPES[model_index].keys() + full_model = _RESNET_MODELS[model_index]( + batchnorm_training=batchnorm_training, + weights=None, + batchnorm_scale=batchnorm_scale, + weight_decay=weight_decay, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon, + include_top=False) + + layer_outputs = [ + full_model.get_layer(name=layer).output for layer in layer_names + ] + return tf.keras.Model(inputs=full_model.inputs, outputs=layer_outputs) + + def _check_returns_correct_shape(self, + image_height, + image_width, + model_index, + expected_feature_map_shape, + batchnorm_training=True, + batchnorm_scale=True, + weight_decay=0.0001, + default_batchnorm_momentum=0.997, + default_batchnorm_epsilon=1e-5): + model = self._create_application_with_layer_outputs( + model_index=model_index, + batchnorm_training=batchnorm_training, + batchnorm_scale=batchnorm_scale, + weight_decay=weight_decay, + default_batchnorm_momentum=default_batchnorm_momentum, + default_batchnorm_epsilon=default_batchnorm_epsilon) + + image_tensor = np.random.rand(_BATCH_SIZE, image_height, image_width, + _NUM_CHANNELS).astype(np.float32) + feature_maps = model(image_tensor) + layer_names = _RESNET_SHAPES[model_index].keys() + for feature_map, layer_name in zip(feature_maps, layer_names): + expected_shape = _RESNET_SHAPES[model_index][layer_name] + self.assertAllEqual(feature_map.shape, expected_shape) + + def _get_variables(self, model_index): + tf.keras.backend.clear_session() + model = self._create_application_with_layer_outputs( + model_index, batchnorm_training=False) + preprocessed_inputs = tf.placeholder(tf.float32, + (4, None, None, _NUM_CHANNELS)) + model(preprocessed_inputs) + return model.variables + + def test_returns_correct_shapes_224(self): + image_height = 224 + image_width = 224 + for model_index, _ in enumerate(_RESNET_NAMES): + expected_feature_map_shape = _RESNET_SHAPES[model_index] + self._check_returns_correct_shape(image_height, image_width, model_index, + expected_feature_map_shape) + + def test_hyperparam_override(self): + for model_name in _RESNET_MODELS: + model = model_name( + batchnorm_training=True, + default_batchnorm_momentum=0.2, + default_batchnorm_epsilon=0.1, + weights=None, + include_top=False) + bn_layer = model.get_layer(name='conv1_bn') + self.assertAllClose(bn_layer.momentum, 0.2) + self.assertAllClose(bn_layer.epsilon, 0.1) + + def test_variable_count(self): + # The number of variables from slim resnetv1-* model. + variable_nums = [265, 520, 775] + for model_index, var_num in enumerate(variable_nums): + variables = self._get_variables(model_index) + self.assertEqual(len(variables), var_num) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/keras_models/test_utils.py b/models/keras_models/test_utils.py new file mode 100644 index 0000000..0669b6c --- /dev/null +++ b/models/keras_models/test_utils.py @@ -0,0 +1,214 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Test utils for other test files.""" + +# import tensorflow as tf +# +# from nets import mobilenet_v1 +# +# slim = tf.contrib.slim +# +# # Layer names of Slim to map Keras layer names in MobilenetV1 +# _MOBLIENET_V1_SLIM_ENDPOINTS = [ +# 'Conv2d_0', +# 'Conv2d_1_depthwise', 'Conv2d_1_pointwise', +# 'Conv2d_2_depthwise', 'Conv2d_2_pointwise', +# 'Conv2d_3_depthwise', 'Conv2d_3_pointwise', +# 'Conv2d_4_depthwise', 'Conv2d_4_pointwise', +# 'Conv2d_5_depthwise', 'Conv2d_5_pointwise', +# 'Conv2d_6_depthwise', 'Conv2d_6_pointwise', +# 'Conv2d_7_depthwise', 'Conv2d_7_pointwise', +# 'Conv2d_8_depthwise', 'Conv2d_8_pointwise', +# 'Conv2d_9_depthwise', 'Conv2d_9_pointwise', +# 'Conv2d_10_depthwise', 'Conv2d_10_pointwise', +# 'Conv2d_11_depthwise', 'Conv2d_11_pointwise', +# 'Conv2d_12_depthwise', 'Conv2d_12_pointwise', +# 'Conv2d_13_depthwise', 'Conv2d_13_pointwise' +# ] +# +# +# # Function to get the output shape of each layer in Slim. It's used to +# # generate the following constant expected_feature_map_shape for MobilenetV1. +# # Similarly, this can also apply to MobilenetV2. +# def _get_slim_endpoint_shapes(inputs, depth_multiplier=1.0, min_depth=8, +# use_explicit_padding=False): +# with slim.arg_scope([slim.conv2d, slim.separable_conv2d], +# normalizer_fn=slim.batch_norm): +# _, end_points = mobilenet_v1.mobilenet_v1_base( +# inputs, final_endpoint='Conv2d_13_pointwise', +# depth_multiplier=depth_multiplier, min_depth=min_depth, +# use_explicit_padding=use_explicit_padding) +# return [end_points[endpoint_name].get_shape() +# for endpoint_name in _MOBLIENET_V1_SLIM_ENDPOINTS] + + +# For Mobilenet V1 +moblenet_v1_expected_feature_map_shape_128 = [ + (2, 64, 64, 32), (2, 64, 64, 32), (2, 64, 64, 64), (2, 32, 32, 64), + (2, 32, 32, 128), (2, 32, 32, 128), (2, 32, 32, 128), (2, 16, 16, 128), + (2, 16, 16, 256), (2, 16, 16, 256), (2, 16, 16, 256), (2, 8, 8, 256), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 4, 4, 512), + (2, 4, 4, 1024), (2, 4, 4, 1024), (2, 4, 4, 1024), +] + +moblenet_v1_expected_feature_map_shape_128_explicit_padding = [ + (2, 64, 64, 32), (2, 64, 64, 32), (2, 64, 64, 64), (2, 32, 32, 64), + (2, 32, 32, 128), (2, 32, 32, 128), (2, 32, 32, 128), (2, 16, 16, 128), + (2, 16, 16, 256), (2, 16, 16, 256), (2, 16, 16, 256), (2, 8, 8, 256), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 4, 4, 512), + (2, 4, 4, 1024), (2, 4, 4, 1024), (2, 4, 4, 1024), +] + +mobilenet_v1_expected_feature_map_shape_with_dynamic_inputs = [ + (2, 64, 64, 32), (2, 64, 64, 32), (2, 64, 64, 64), (2, 32, 32, 64), + (2, 32, 32, 128), (2, 32, 32, 128), (2, 32, 32, 128), (2, 16, 16, 128), + (2, 16, 16, 256), (2, 16, 16, 256), (2, 16, 16, 256), (2, 8, 8, 256), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), + (2, 8, 8, 512), (2, 8, 8, 512), (2, 8, 8, 512), (2, 4, 4, 512), + (2, 4, 4, 1024), (2, 4, 4, 1024), (2, 4, 4, 1024), +] + +moblenet_v1_expected_feature_map_shape_299 = [ + (2, 150, 150, 32), (2, 150, 150, 32), (2, 150, 150, 64), (2, 75, 75, 64), + (2, 75, 75, 128), (2, 75, 75, 128), (2, 75, 75, 128), (2, 38, 38, 128), + (2, 38, 38, 256), (2, 38, 38, 256), (2, 38, 38, 256), (2, 19, 19, 256), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 10, 10, 512), + (2, 10, 10, 1024), (2, 10, 10, 1024), (2, 10, 10, 1024), +] + +moblenet_v1_expected_feature_map_shape_enforcing_min_depth = [ + (2, 150, 150, 8), (2, 150, 150, 8), (2, 150, 150, 8), (2, 75, 75, 8), + (2, 75, 75, 8), (2, 75, 75, 8), (2, 75, 75, 8), (2, 38, 38, 8), + (2, 38, 38, 8), (2, 38, 38, 8), (2, 38, 38, 8), (2, 19, 19, 8), + (2, 19, 19, 8), (2, 19, 19, 8), (2, 19, 19, 8), (2, 19, 19, 8), + (2, 19, 19, 8), (2, 19, 19, 8), (2, 19, 19, 8), (2, 19, 19, 8), + (2, 19, 19, 8), (2, 19, 19, 8), (2, 19, 19, 8), (2, 10, 10, 8), + (2, 10, 10, 8), (2, 10, 10, 8), (2, 10, 10, 8), +] + +moblenet_v1_expected_feature_map_shape_with_conv_defs = [ + (2, 150, 150, 32), (2, 150, 150, 32), (2, 150, 150, 64), (2, 75, 75, 64), + (2, 75, 75, 128), (2, 75, 75, 128), (2, 75, 75, 128), (2, 38, 38, 128), + (2, 38, 38, 256), (2, 38, 38, 256), (2, 38, 38, 256), (2, 19, 19, 256), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), + (2, 19, 19, 512), (2, 19, 19, 512), (2, 19, 19, 512), (2, 10, 10, 512), + (2, 10, 10, 512), (2, 10, 10, 512), (2, 10, 10, 256), +] + +# For Mobilenet V2 +moblenet_v2_expected_feature_map_shape_128 = [ + (2, 64, 64, 32), (2, 64, 64, 96), (2, 32, 32, 96), (2, 32, 32, 24), + (2, 32, 32, 144), (2, 32, 32, 144), (2, 32, 32, 24), (2, 32, 32, 144), + (2, 16, 16, 144), (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), + (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), (2, 16, 16, 32), + (2, 16, 16, 192), (2, 8, 8, 192), (2, 8, 8, 64), (2, 8, 8, 384), + (2, 8, 8, 384), (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), + (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 64), + (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 96), (2, 8, 8, 576), + (2, 8, 8, 576), (2, 8, 8, 96), (2, 8, 8, 576), (2, 8, 8, 576), + (2, 8, 8, 96), (2, 8, 8, 576), (2, 4, 4, 576), (2, 4, 4, 160), + (2, 4, 4, 960), (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), + (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), (2, 4, 4, 960), + (2, 4, 4, 320), (2, 4, 4, 1280) +] + +moblenet_v2_expected_feature_map_shape_128_explicit_padding = [ + (2, 64, 64, 32), (2, 64, 64, 96), (2, 32, 32, 96), (2, 32, 32, 24), + (2, 32, 32, 144), (2, 32, 32, 144), (2, 32, 32, 24), (2, 32, 32, 144), + (2, 16, 16, 144), (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), + (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), (2, 16, 16, 32), + (2, 16, 16, 192), (2, 8, 8, 192), (2, 8, 8, 64), (2, 8, 8, 384), + (2, 8, 8, 384), (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), + (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 64), + (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 96), (2, 8, 8, 576), + (2, 8, 8, 576), (2, 8, 8, 96), (2, 8, 8, 576), (2, 8, 8, 576), + (2, 8, 8, 96), (2, 8, 8, 576), (2, 4, 4, 576), (2, 4, 4, 160), + (2, 4, 4, 960), (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), + (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), (2, 4, 4, 960), + (2, 4, 4, 320), (2, 4, 4, 1280) +] + +mobilenet_v2_expected_feature_map_shape_with_dynamic_inputs = [ + (2, 64, 64, 32), (2, 64, 64, 96), (2, 32, 32, 96), (2, 32, 32, 24), + (2, 32, 32, 144), (2, 32, 32, 144), (2, 32, 32, 24), (2, 32, 32, 144), + (2, 16, 16, 144), (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), + (2, 16, 16, 32), (2, 16, 16, 192), (2, 16, 16, 192), (2, 16, 16, 32), + (2, 16, 16, 192), (2, 8, 8, 192), (2, 8, 8, 64), (2, 8, 8, 384), + (2, 8, 8, 384), (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), + (2, 8, 8, 64), (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 64), + (2, 8, 8, 384), (2, 8, 8, 384), (2, 8, 8, 96), (2, 8, 8, 576), + (2, 8, 8, 576), (2, 8, 8, 96), (2, 8, 8, 576), (2, 8, 8, 576), + (2, 8, 8, 96), (2, 8, 8, 576), (2, 4, 4, 576), (2, 4, 4, 160), + (2, 4, 4, 960), (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), + (2, 4, 4, 960), (2, 4, 4, 160), (2, 4, 4, 960), (2, 4, 4, 960), + (2, 4, 4, 320), (2, 4, 4, 1280) +] + +moblenet_v2_expected_feature_map_shape_299 = [ + (2, 150, 150, 32), (2, 150, 150, 96), (2, 75, 75, 96), (2, 75, 75, 24), + (2, 75, 75, 144), (2, 75, 75, 144), (2, 75, 75, 24), (2, 75, 75, 144), + (2, 38, 38, 144), (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), + (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), (2, 38, 38, 32), + (2, 38, 38, 192), (2, 19, 19, 192), (2, 19, 19, 64), (2, 19, 19, 384), + (2, 19, 19, 384), (2, 19, 19, 64), (2, 19, 19, 384), (2, 19, 19, 384), + (2, 19, 19, 64), (2, 19, 19, 384), (2, 19, 19, 384), (2, 19, 19, 64), + (2, 19, 19, 384), (2, 19, 19, 384), (2, 19, 19, 96), (2, 19, 19, 576), + (2, 19, 19, 576), (2, 19, 19, 96), (2, 19, 19, 576), (2, 19, 19, 576), + (2, 19, 19, 96), (2, 19, 19, 576), (2, 10, 10, 576), (2, 10, 10, 160), + (2, 10, 10, 960), (2, 10, 10, 960), (2, 10, 10, 160), (2, 10, 10, 960), + (2, 10, 10, 960), (2, 10, 10, 160), (2, 10, 10, 960), (2, 10, 10, 960), + (2, 10, 10, 320), (2, 10, 10, 1280) +] + +moblenet_v2_expected_feature_map_shape_enforcing_min_depth = [ + (2, 150, 150, 32), (2, 150, 150, 192), (2, 75, 75, 192), (2, 75, 75, 32), + (2, 75, 75, 192), (2, 75, 75, 192), (2, 75, 75, 32), (2, 75, 75, 192), + (2, 38, 38, 192), (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), + (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), (2, 38, 38, 32), + (2, 38, 38, 192), (2, 19, 19, 192), (2, 19, 19, 32), (2, 19, 19, 192), + (2, 19, 19, 192), (2, 19, 19, 32), (2, 19, 19, 192), (2, 19, 19, 192), + (2, 19, 19, 32), (2, 19, 19, 192), (2, 19, 19, 192), (2, 19, 19, 32), + (2, 19, 19, 192), (2, 19, 19, 192), (2, 19, 19, 32), (2, 19, 19, 192), + (2, 19, 19, 192), (2, 19, 19, 32), (2, 19, 19, 192), (2, 19, 19, 192), + (2, 19, 19, 32), (2, 19, 19, 192), (2, 10, 10, 192), (2, 10, 10, 32), + (2, 10, 10, 192), (2, 10, 10, 192), (2, 10, 10, 32), (2, 10, 10, 192), + (2, 10, 10, 192), (2, 10, 10, 32), (2, 10, 10, 192), (2, 10, 10, 192), + (2, 10, 10, 32), (2, 10, 10, 32) +] + +moblenet_v2_expected_feature_map_shape_with_conv_defs = [ + (2, 150, 150, 32), (2, 150, 150, 96), (2, 75, 75, 96), (2, 75, 75, 24), + (2, 75, 75, 144), (2, 75, 75, 144), (2, 75, 75, 24), (2, 75, 75, 144), + (2, 38, 38, 144), (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), + (2, 38, 38, 32), (2, 38, 38, 192), (2, 38, 38, 192), (2, 38, 38, 32), + (2, 38, 38, 192), (2, 19, 19, 192), (2, 19, 19, 64), (2, 19, 19, 384), + (2, 19, 19, 384), (2, 19, 19, 64), (2, 19, 19, 384), (2, 19, 19, 384), + (2, 19, 19, 64), (2, 19, 19, 384), (2, 19, 19, 384), (2, 19, 19, 64), + (2, 19, 19, 384), (2, 19, 19, 384), (2, 19, 19, 96), (2, 19, 19, 576), + (2, 19, 19, 576), (2, 19, 19, 96), (2, 19, 19, 576), (2, 19, 19, 576), + (2, 19, 19, 96), (2, 19, 19, 576), (2, 10, 10, 576), (2, 10, 10, 160), + (2, 10, 10, 960), (2, 10, 10, 960), (2, 10, 10, 160), (2, 10, 10, 960), + (2, 10, 10, 960), (2, 10, 10, 160), (2, 10, 10, 960), (2, 10, 10, 960), + (2, 10, 10, 320), (2, 10, 10, 256) +] diff --git a/models/ssd_feature_extractor_test.py b/models/ssd_feature_extractor_test.py new file mode 100644 index 0000000..10b2819 --- /dev/null +++ b/models/ssd_feature_extractor_test.py @@ -0,0 +1,226 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base test class SSDFeatureExtractors.""" + +from abc import abstractmethod + +import numpy as np +from six.moves import zip +import tensorflow as tf + +from google.protobuf import text_format +from tensorflow.contrib import slim as contrib_slim +from object_detection.builders import hyperparams_builder +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class SsdFeatureExtractorTestBase(test_case.TestCase): + + def _build_conv_hyperparams(self, add_batch_norm=True): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + if add_batch_norm: + batch_norm_proto = """ + batch_norm { + scale: false + } + """ + conv_hyperparams_text_proto += batch_norm_proto + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def conv_hyperparams_fn(self): + with contrib_slim.arg_scope([]) as sc: + return sc + + @abstractmethod + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + use_keras=False, + use_depthwise=False): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + use_depthwise: Whether to use depthwise convolutions. + Returns: + an ssd_meta_arch.SSDFeatureExtractor or an + ssd_meta_arch.SSDKerasFeatureExtractor object. + """ + pass + + def _extract_features(self, + image_tensor, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + use_keras=False): + kwargs = {} + if use_explicit_padding: + kwargs.update({'use_explicit_padding': use_explicit_padding}) + if use_depthwise: + kwargs.update({'use_depthwise': use_depthwise}) + if num_layers != 6: + kwargs.update({'num_layers': num_layers}) + if use_keras: + kwargs.update({'use_keras': use_keras}) + feature_extractor = self._create_feature_extractor( + depth_multiplier, + pad_to_multiple, + **kwargs) + if use_keras: + feature_maps = feature_extractor(image_tensor) + else: + feature_maps = feature_extractor.extract_features(image_tensor) + return feature_maps + + def check_extract_features_returns_correct_shape(self, + batch_size, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shapes, + use_explicit_padding=False, + num_layers=6, + use_keras=False, + use_depthwise=False): + + def graph_fn(image_tensor): + return self._extract_features( + image_tensor, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers, + use_keras=use_keras, + use_depthwise=use_depthwise) + + image_tensor = np.random.rand(batch_size, image_height, image_width, + 3).astype(np.float32) + feature_maps = self.execute(graph_fn, [image_tensor]) + for feature_map, expected_shape in zip( + feature_maps, expected_feature_map_shapes): + self.assertAllEqual(feature_map.shape, expected_shape) + + def check_extract_features_returns_correct_shapes_with_dynamic_inputs( + self, + batch_size, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shapes, + use_explicit_padding=False, + num_layers=6, + use_keras=False, + use_depthwise=False): + + def graph_fn(image_height, image_width): + image_tensor = tf.random_uniform([batch_size, image_height, image_width, + 3], dtype=tf.float32) + return self._extract_features( + image_tensor, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers, + use_keras=use_keras, + use_depthwise=use_depthwise) + + feature_maps = self.execute_cpu(graph_fn, [ + np.array(image_height, dtype=np.int32), + np.array(image_width, dtype=np.int32) + ]) + for feature_map, expected_shape in zip( + feature_maps, expected_feature_map_shapes): + self.assertAllEqual(feature_map.shape, expected_shape) + + def check_extract_features_raises_error_with_invalid_image_size( + self, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + use_keras=False, + use_depthwise=False): + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + feature_maps = self._extract_features( + preprocessed_inputs, + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + test_preprocessed_image = np.random.rand(4, image_height, image_width, 3) + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + with self.assertRaises(tf.errors.InvalidArgumentError): + sess.run(feature_maps, + feed_dict={preprocessed_inputs: test_preprocessed_image}) + + def check_feature_extractor_variables_under_scope(self, + depth_multiplier, + pad_to_multiple, + scope_name, + use_keras=False, + use_depthwise=False): + variables = self.get_feature_extractor_variables( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + for variable in variables: + self.assertTrue(variable.name.startswith(scope_name)) + + def get_feature_extractor_variables(self, + depth_multiplier, + pad_to_multiple, + use_keras=False, + use_depthwise=False): + g = tf.Graph() + with g.as_default(): + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + self._extract_features( + preprocessed_inputs, + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) diff --git a/models/ssd_inception_v2_feature_extractor.py b/models/ssd_inception_v2_feature_extractor.py new file mode 100644 index 0000000..d40da53 --- /dev/null +++ b/models/ssd_inception_v2_feature_extractor.py @@ -0,0 +1,138 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for InceptionV2 features.""" +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets import inception_v2 + +slim = contrib_slim + + +class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using InceptionV2 features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + override_base_feature_extractor_hyperparams=False): + """InceptionV2 Feature Extractor for SSD Models. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. + use_depthwise: Whether to use depthwise convolutions. Default is False. + num_layers: Number of SSD layers. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + + Raises: + ValueError: If `override_base_feature_extractor_hyperparams` is False. + """ + super(SSDInceptionV2FeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + num_layers=num_layers, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + if not self._override_base_feature_extractor_hyperparams: + raise ValueError('SSD Inception V2 feature extractor always uses' + 'scope returned by `conv_hyperparams_fn` for both the ' + 'base feature extractor and the additional layers ' + 'added since there is no arg_scope defined for the base ' + 'feature extractor.') + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + feature_map_layout = { + 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', '' + ][:self._num_layers], + 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], + 'use_explicit_padding': self._use_explicit_padding, + 'use_depthwise': self._use_depthwise, + } + + with slim.arg_scope(self._conv_hyperparams_fn()): + with tf.variable_scope('InceptionV2', + reuse=self._reuse_weights) as scope: + _, image_features = inception_v2.inception_v2_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + final_endpoint='Mixed_5c', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + scope=scope) + feature_maps = feature_map_generators.multi_resolution_feature_maps( + feature_map_layout=feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + image_features=image_features) + + return feature_maps.values() diff --git a/models/ssd_inception_v2_feature_extractor_test.py b/models/ssd_inception_v2_feature_extractor_test.py new file mode 100644 index 0000000..4eb32e4 --- /dev/null +++ b/models/ssd_inception_v2_feature_extractor_test.py @@ -0,0 +1,157 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.models.ssd_inception_v2_feature_extractor.""" +import numpy as np +import tensorflow as tf + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_inception_v2_feature_extractor + + +class SsdInceptionV2FeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + is_training=True): + """Constructs a SsdInceptionV2FeatureExtractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + is_training: whether the network is in training mode. + + Returns: + an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. + """ + min_depth = 32 + return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + num_layers=num_layers, + override_base_feature_extractor_hyperparams=True) + + def test_extract_features_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1024), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 128), (2, 10, 10, 128), + (2, 5, 5, 32), (2, 3, 3, 32), + (2, 2, 2, 32), (2, 1, 1, 32)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1024), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_raises_error_with_invalid_image_size(self): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(4, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'InceptionV2' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, scope_name) + + def test_extract_features_with_fewer_layers(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, num_layers=4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_inception_v3_feature_extractor_test.py b/models/ssd_inception_v3_feature_extractor_test.py new file mode 100644 index 0000000..6927314 --- /dev/null +++ b/models/ssd_inception_v3_feature_extractor_test.py @@ -0,0 +1,157 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.models.ssd_inception_v3_feature_extractor.""" +import numpy as np +import tensorflow as tf + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_inception_v3_feature_extractor + + +class SsdInceptionV3FeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + is_training=True): + """Constructs a SsdInceptionV3FeatureExtractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + is_training: whether the network is in training mode. + + Returns: + an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor. + """ + min_depth = 32 + return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + num_layers=num_layers, + override_base_feature_extractor_hyperparams=True) + + def test_extract_features_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768), + (2, 2, 2, 2048), (2, 1, 1, 512), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768), + (2, 2, 2, 2048), (2, 1, 1, 512), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 35, 35, 288), (2, 17, 17, 768), + (2, 8, 8, 2048), (2, 4, 4, 512), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 35, 35, 128), (2, 17, 17, 128), + (2, 8, 8, 192), (2, 4, 4, 32), + (2, 2, 2, 32), (2, 1, 1, 32)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 37, 37, 288), (2, 18, 18, 768), + (2, 8, 8, 2048), (2, 4, 4, 512), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_raises_error_with_invalid_image_size(self): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(4, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'InceptionV3' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, scope_name) + + def test_extract_features_with_fewer_layers(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768), + (2, 2, 2, 2048), (2, 1, 1, 512)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, num_layers=4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v1_feature_extractor.py b/models/ssd_mobilenet_v1_feature_extractor.py new file mode 100644 index 0000000..810af5b --- /dev/null +++ b/models/ssd_mobilenet_v1_feature_extractor.py @@ -0,0 +1,139 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for MobilenetV1 features.""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets import mobilenet_v1 + +slim = contrib_slim + + +class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using MobilenetV1 features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + override_base_feature_extractor_hyperparams=False): + """MobileNetV1 Feature Extractor for SSD Models. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + use_depthwise: Whether to use depthwise convolutions. Default is False. + num_layers: Number of SSD layers. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDMobileNetV1FeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + num_layers=num_layers, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + feature_map_layout = { + 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', + '', ''][:self._num_layers], + 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], + 'use_explicit_padding': self._use_explicit_padding, + 'use_depthwise': self._use_depthwise, + } + + with tf.variable_scope('MobilenetV1', + reuse=self._reuse_weights) as scope: + with slim.arg_scope( + mobilenet_v1.mobilenet_v1_arg_scope( + is_training=None, regularize_depthwise=True)): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams + else context_manager.IdentityContextManager()): + _, image_features = mobilenet_v1.mobilenet_v1_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + final_endpoint='Conv2d_13_pointwise', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + use_explicit_padding=self._use_explicit_padding, + scope=scope) + with slim.arg_scope(self._conv_hyperparams_fn()): + feature_maps = feature_map_generators.multi_resolution_feature_maps( + feature_map_layout=feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + image_features=image_features) + + return feature_maps.values() diff --git a/models/ssd_mobilenet_v1_feature_extractor_test.py b/models/ssd_mobilenet_v1_feature_extractor_test.py new file mode 100644 index 0000000..11f32e4 --- /dev/null +++ b/models/ssd_mobilenet_v1_feature_extractor_test.py @@ -0,0 +1,296 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for SSD Mobilenet V1 feature extractors. + +By using parameterized test decorator, this test serves for both Slim-based and +Keras-based Mobilenet V1 feature extractors in SSD. +""" +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_mobilenet_v1_feature_extractor +from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor + +slim = contrib_slim + + +@parameterized.parameters( + {'use_keras': False}, + {'use_keras': True}, +) +class SsdMobilenetV1FeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + is_training=False, + use_keras=False): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + is_training: whether the network is in training mode. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + if use_keras: + return (ssd_mobilenet_v1_keras_feature_extractor + .SSDMobileNetV1KerasFeatureExtractor( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams=self._build_conv_hyperparams( + add_batch_norm=False), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers, + name='MobilenetV1')) + else: + return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers) + + def test_extract_features_returns_correct_shapes_128(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_299(self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_with_dynamic_image_shape(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth( + self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32), + (2, 5, 5, 32), (2, 3, 3, 32), (2, 2, 2, 32), + (2, 1, 1, 32)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple( + self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_raises_error_with_invalid_image_size( + self, use_keras): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + use_keras=use_keras) + + def test_preprocess_returns_correct_value_range(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(2, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor( + depth_multiplier, pad_to_multiple, use_keras=use_keras) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'MobilenetV1' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras) + + def test_variable_count(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + variables = self.get_feature_extractor_variables( + depth_multiplier, pad_to_multiple, use_keras=use_keras) + self.assertEqual(len(variables), 151) + + def test_has_fused_batchnorm(self, use_keras): + image_height = 40 + image_width = 40 + depth_multiplier = 1 + pad_to_multiple = 1 + image_placeholder = tf.placeholder(tf.float32, + [1, image_height, image_width, 3]) + feature_extractor = self._create_feature_extractor( + depth_multiplier, pad_to_multiple, use_keras=use_keras) + preprocessed_image = feature_extractor.preprocess(image_placeholder) + if use_keras: + _ = feature_extractor(preprocessed_image) + else: + _ = feature_extractor.extract_features(preprocessed_image) + self.assertTrue( + any('FusedBatchNorm' in op.type + for op in tf.get_default_graph().get_operations())) + + def test_extract_features_with_fewer_layers(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024), + (2, 2, 2, 512), (2, 1, 1, 256)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False, num_layers=4, + use_keras=use_keras) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v1_fpn_feature_extractor.py b/models/ssd_mobilenet_v1_fpn_feature_extractor.py new file mode 100644 index 0000000..8d269de --- /dev/null +++ b/models/ssd_mobilenet_v1_fpn_feature_extractor.py @@ -0,0 +1,198 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSD MobilenetV1 FPN Feature Extractor.""" + +import copy +import functools +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets import mobilenet_v1 + +slim = contrib_slim + + +# A modified config of mobilenet v1 that makes it more detection friendly, +def _create_modified_mobilenet_config(): + conv_defs = copy.deepcopy(mobilenet_v1.MOBILENETV1_CONV_DEFS) + conv_defs[-2] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512) + conv_defs[-1] = mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256) + return conv_defs + + +class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using MobilenetV1 FPN features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + fpn_min_level=3, + fpn_max_level=7, + additional_layer_depth=256, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + use_native_resize_op=False, + override_base_feature_extractor_hyperparams=False): + """SSD FPN feature extractor based on Mobilenet v1 architecture. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the base + feature extractor. + fpn_min_level: the highest resolution feature map to use in FPN. The valid + values are {2, 3, 4, 5} which map to MobileNet v1 layers + {Conv2d_3_pointwise, Conv2d_5_pointwise, Conv2d_11_pointwise, + Conv2d_13_pointwise}, respectively. + fpn_max_level: the smallest resolution feature map to construct or use in + FPN. FPN constructions uses features maps starting from fpn_min_level + upto the fpn_max_level. In the case that there are not enough feature + maps in the backbone network, additional feature maps are created by + applying stride 2 convolutions until we get the desired number of fpn + levels. + additional_layer_depth: additional feature map layer channel depth. + reuse_weights: whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. + use_depthwise: Whether to use depthwise convolutions. Default is False. + use_native_resize_op: Whether to use tf.image.nearest_neighbor_resize + to do upsampling in FPN. Default is false. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDMobileNetV1FpnFeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + self._fpn_min_level = fpn_min_level + self._fpn_max_level = fpn_max_level + self._additional_layer_depth = additional_layer_depth + self._conv_defs = None + if self._use_depthwise: + self._conv_defs = _create_modified_mobilenet_config() + self._use_native_resize_op = use_native_resize_op + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + with tf.variable_scope('MobilenetV1', + reuse=self._reuse_weights) as scope: + with slim.arg_scope( + mobilenet_v1.mobilenet_v1_arg_scope( + is_training=None, regularize_depthwise=True)): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams + else context_manager.IdentityContextManager()): + _, image_features = mobilenet_v1.mobilenet_v1_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + final_endpoint='Conv2d_13_pointwise', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + conv_defs=self._conv_defs, + use_explicit_padding=self._use_explicit_padding, + scope=scope) + + depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) + with slim.arg_scope(self._conv_hyperparams_fn()): + with tf.variable_scope('fpn', reuse=self._reuse_weights): + feature_blocks = [ + 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', + 'Conv2d_13_pointwise' + ] + base_fpn_max_level = min(self._fpn_max_level, 5) + feature_block_list = [] + for level in range(self._fpn_min_level, base_fpn_max_level + 1): + feature_block_list.append(feature_blocks[level - 2]) + fpn_features = feature_map_generators.fpn_top_down_feature_maps( + [(key, image_features[key]) for key in feature_block_list], + depth=depth_fn(self._additional_layer_depth), + use_depthwise=self._use_depthwise, + use_explicit_padding=self._use_explicit_padding, + use_native_resize_op=self._use_native_resize_op) + feature_maps = [] + for level in range(self._fpn_min_level, base_fpn_max_level + 1): + feature_maps.append(fpn_features['top_down_{}'.format( + feature_blocks[level - 2])]) + last_feature_map = fpn_features['top_down_{}'.format( + feature_blocks[base_fpn_max_level - 2])] + # Construct coarse features + padding = 'VALID' if self._use_explicit_padding else 'SAME' + kernel_size = 3 + for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): + if self._use_depthwise: + conv_op = functools.partial( + slim.separable_conv2d, depth_multiplier=1) + else: + conv_op = slim.conv2d + if self._use_explicit_padding: + last_feature_map = ops.fixed_padding( + last_feature_map, kernel_size) + last_feature_map = conv_op( + last_feature_map, + num_outputs=depth_fn(self._additional_layer_depth), + kernel_size=[kernel_size, kernel_size], + stride=2, + padding=padding, + scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13)) + feature_maps.append(last_feature_map) + return feature_maps diff --git a/models/ssd_mobilenet_v1_ppn_feature_extractor.py b/models/ssd_mobilenet_v1_ppn_feature_extractor.py new file mode 100644 index 0000000..42ef132 --- /dev/null +++ b/models/ssd_mobilenet_v1_ppn_feature_extractor.py @@ -0,0 +1,85 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for MobilenetV1 PPN features.""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets import mobilenet_v1 + +slim = contrib_slim + + +class SSDMobileNetV1PpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using MobilenetV1 PPN features.""" + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + with tf.variable_scope('MobilenetV1', + reuse=self._reuse_weights) as scope: + with slim.arg_scope( + mobilenet_v1.mobilenet_v1_arg_scope( + is_training=None, regularize_depthwise=True)): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams + else context_manager.IdentityContextManager()): + _, image_features = mobilenet_v1.mobilenet_v1_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + final_endpoint='Conv2d_13_pointwise', + min_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + use_explicit_padding=self._use_explicit_padding, + scope=scope) + with slim.arg_scope(self._conv_hyperparams_fn()): + feature_maps = feature_map_generators.pooling_pyramid_feature_maps( + base_feature_map_depth=0, + num_layers=6, + image_features={ + 'image_features': image_features['Conv2d_11_pointwise'] + }) + return feature_maps.values() diff --git a/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py b/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py new file mode 100644 index 0000000..9ca7a28 --- /dev/null +++ b/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py @@ -0,0 +1,186 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for ssd_mobilenet_v1_ppn_feature_extractor.""" +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_mobilenet_v1_ppn_feature_extractor + +slim = contrib_slim + + +class SsdMobilenetV1PpnFeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, + is_training=True, use_explicit_padding=False): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + is_training: whether the network is in training mode. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + return (ssd_mobilenet_v1_ppn_feature_extractor. + SSDMobileNetV1PpnFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding)) + + def test_extract_features_returns_correct_shapes_320(self): + image_height = 320 + image_width = 320 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512), + (2, 5, 5, 512), (2, 3, 3, 512), + (2, 2, 2, 512), (2, 1, 1, 512)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_returns_correct_shapes_300(self): + image_height = 300 + image_width = 300 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 512), + (2, 5, 5, 512), (2, 3, 3, 512), + (2, 2, 2, 512), (2, 1, 1, 512)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_returns_correct_shapes_640(self): + image_height = 640 + image_width = 640 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 40, 40, 512), (2, 20, 20, 512), + (2, 10, 10, 512), (2, 5, 5, 512), + (2, 3, 3, 512), (2, 2, 2, 512)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_with_dynamic_image_shape(self): + image_height = 320 + image_width = 320 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512), + (2, 5, 5, 512), (2, 3, 3, 512), + (2, 2, 2, 512), (2, 1, 1, 512)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512), + (2, 5, 5, 512), (2, 3, 3, 512), + (2, 2, 2, 512)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): + image_height = 256 + image_width = 256 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32), + (2, 4, 4, 32), (2, 2, 2, 32), + (2, 1, 1, 32)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False) + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True) + + def test_extract_features_raises_error_with_invalid_image_size(self): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(2, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'MobilenetV1' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, scope_name) + + def test_has_fused_batchnorm(self): + image_height = 320 + image_width = 320 + depth_multiplier = 1 + pad_to_multiple = 1 + image_placeholder = tf.placeholder(tf.float32, + [1, image_height, image_width, 3]) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(image_placeholder) + _ = feature_extractor.extract_features(preprocessed_image) + self.assertTrue(any('FusedBatchNorm' in op.type + for op in tf.get_default_graph().get_operations())) + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v2_feature_extractor.py b/models/ssd_mobilenet_v2_feature_extractor.py new file mode 100644 index 0000000..74725e0 --- /dev/null +++ b/models/ssd_mobilenet_v2_feature_extractor.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for MobilenetV2 features.""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets.mobilenet import mobilenet +from nets.mobilenet import mobilenet_v2 + +slim = contrib_slim + + +class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using MobilenetV2 features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + override_base_feature_extractor_hyperparams=False): + """MobileNetV2 Feature Extractor for SSD Models. + + Mobilenet v2 (experimental), designed by sandler@. More details can be found + in //knowledge/cerebra/brain/compression/mobilenet/mobilenet_experimental.py + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. + use_depthwise: Whether to use depthwise convolutions. Default is False. + num_layers: Number of SSD layers. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDMobileNetV2FeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + num_layers=num_layers, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + feature_map_layout = { + 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', '' + ][:self._num_layers], + 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], + 'use_depthwise': self._use_depthwise, + 'use_explicit_padding': self._use_explicit_padding, + } + + with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: + with slim.arg_scope( + mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ + slim.arg_scope( + [mobilenet.depth_multiplier], min_depth=self._min_depth): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams else + context_manager.IdentityContextManager()): + _, image_features = mobilenet_v2.mobilenet_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + final_endpoint='layer_19', + depth_multiplier=self._depth_multiplier, + use_explicit_padding=self._use_explicit_padding, + scope=scope) + with slim.arg_scope(self._conv_hyperparams_fn()): + feature_maps = feature_map_generators.multi_resolution_feature_maps( + feature_map_layout=feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + image_features=image_features) + + return feature_maps.values() diff --git a/models/ssd_mobilenet_v2_feature_extractor_test.py b/models/ssd_mobilenet_v2_feature_extractor_test.py new file mode 100644 index 0000000..8cd1de9 --- /dev/null +++ b/models/ssd_mobilenet_v2_feature_extractor_test.py @@ -0,0 +1,227 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for ssd_mobilenet_v2_feature_extractor.""" +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_mobilenet_v2_feature_extractor +from object_detection.models import ssd_mobilenet_v2_keras_feature_extractor + +slim = contrib_slim + + +@parameterized.parameters( + {'use_keras': False}, + {'use_keras': True}, +) +class SsdMobilenetV2FeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + use_keras=False): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + if use_keras: + return (ssd_mobilenet_v2_keras_feature_extractor. + SSDMobileNetV2KerasFeatureExtractor( + is_training=False, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers, + name='MobilenetV2')) + else: + return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor( + False, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers) + + def test_extract_features_returns_correct_shapes_128(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_128_explicit_padding( + self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=True, + use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_with_dynamic_inputs( + self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_299(self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1280), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth( + self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 192), (2, 10, 10, 32), + (2, 5, 5, 32), (2, 3, 3, 32), + (2, 2, 2, 32), (2, 1, 1, 32)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple( + self, use_keras): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1280), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_raises_error_with_invalid_image_size( + self, use_keras): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple, + use_keras=use_keras) + + def test_preprocess_returns_correct_value_range(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(4, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple, + use_keras=use_keras) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'MobilenetV2' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras) + + def test_variable_count(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + variables = self.get_feature_extractor_variables( + depth_multiplier, pad_to_multiple, use_keras=use_keras) + self.assertEqual(len(variables), 292) + + def test_has_fused_batchnorm(self, use_keras): + image_height = 40 + image_width = 40 + depth_multiplier = 1 + pad_to_multiple = 1 + image_placeholder = tf.placeholder(tf.float32, + [1, image_height, image_width, 3]) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple, + use_keras=use_keras) + preprocessed_image = feature_extractor.preprocess(image_placeholder) + if use_keras: + _ = feature_extractor(preprocessed_image) + else: + _ = feature_extractor.extract_features(preprocessed_image) + self.assertTrue(any('FusedBatchNorm' in op.type + for op in tf.get_default_graph().get_operations())) + + def test_extract_features_with_fewer_layers(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1280), + (2, 2, 2, 512), (2, 1, 1, 256)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_explicit_padding=False, num_layers=4, + use_keras=use_keras) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py b/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py new file mode 100644 index 0000000..6a8b76e --- /dev/null +++ b/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py @@ -0,0 +1,435 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for ssd_mobilenet_v2_fpn_feature_extractor. + +By using parameterized test decorator, this test serves for both Slim-based and +Keras-based Mobilenet V2 FPN feature extractors in SSD. +""" +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_mobilenet_v2_fpn_feature_extractor +from object_detection.models import ssd_mobilenet_v2_fpn_keras_feature_extractor + +slim = contrib_slim + + +@parameterized.parameters( + { + 'use_depthwise': False, + 'use_keras': True + }, + { + 'use_depthwise': True, + 'use_keras': True + }, + { + 'use_depthwise': False, + 'use_keras': False + }, + { + 'use_depthwise': True, + 'use_keras': False + }, +) +class SsdMobilenetV2FpnFeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + is_training=True, + use_explicit_padding=False, + use_keras=False, + use_depthwise=False): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + is_training: whether the network is in training mode. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + use_depthwise: Whether to use depthwise convolutions. + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + if use_keras: + return (ssd_mobilenet_v2_fpn_keras_feature_extractor + .SSDMobileNetV2FpnKerasFeatureExtractor( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams=self._build_conv_hyperparams( + add_batch_norm=False), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + name='MobilenetV2_FPN')) + else: + return (ssd_mobilenet_v2_fpn_feature_extractor + .SSDMobileNetV2FpnFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_depthwise=use_depthwise, + use_explicit_padding=use_explicit_padding)) + + def test_extract_features_returns_correct_shapes_256(self, use_keras, + use_depthwise): + image_height = 256 + image_width = 256 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), + (2, 8, 8, 256), (2, 4, 4, 256), + (2, 2, 2, 256)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras, + use_depthwise=use_depthwise) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_extract_features_returns_correct_shapes_384(self, use_keras, + use_depthwise): + image_height = 320 + image_width = 320 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256), + (2, 10, 10, 256), (2, 5, 5, 256), + (2, 3, 3, 256)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras, + use_depthwise=use_depthwise) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_extract_features_with_dynamic_image_shape(self, use_keras, + use_depthwise): + image_height = 256 + image_width = 256 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), + (2, 8, 8, 256), (2, 4, 4, 256), + (2, 2, 2, 256)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras, + use_depthwise=use_depthwise) + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple( + self, use_keras, use_depthwise): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256), + (2, 10, 10, 256), (2, 5, 5, 256), + (2, 3, 3, 256)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras, + use_depthwise=use_depthwise) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_extract_features_returns_correct_shapes_enforcing_min_depth( + self, use_keras, use_depthwise): + image_height = 256 + image_width = 256 + depth_multiplier = 0.5**12 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, 32), (2, 16, 16, 32), + (2, 8, 8, 32), (2, 4, 4, 32), + (2, 2, 2, 32)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=False, + use_keras=use_keras, + use_depthwise=use_depthwise) + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_explicit_padding=True, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_extract_features_raises_error_with_invalid_image_size( + self, use_keras, use_depthwise): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_preprocess_returns_correct_value_range(self, use_keras, + use_depthwise): + image_height = 256 + image_width = 256 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(2, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_variables_only_created_in_scope(self, use_keras, use_depthwise): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = 'MobilenetV2' + self.check_feature_extractor_variables_under_scope( + depth_multiplier, + pad_to_multiple, + scope_name, + use_keras=use_keras, + use_depthwise=use_depthwise) + + def test_fused_batchnorm(self, use_keras, use_depthwise): + image_height = 256 + image_width = 256 + depth_multiplier = 1 + pad_to_multiple = 1 + image_placeholder = tf.placeholder(tf.float32, + [1, image_height, image_width, 3]) + feature_extractor = self._create_feature_extractor( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + preprocessed_image = feature_extractor.preprocess(image_placeholder) + if use_keras: + _ = feature_extractor(preprocessed_image) + else: + _ = feature_extractor.extract_features(preprocessed_image) + self.assertTrue( + any('FusedBatchNorm' in op.type + for op in tf.get_default_graph().get_operations())) + + def test_variable_count(self, use_keras, use_depthwise): + depth_multiplier = 1 + pad_to_multiple = 1 + variables = self.get_feature_extractor_variables( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + expected_variables_len = 274 + if use_depthwise: + expected_variables_len = 278 + self.assertEqual(len(variables), expected_variables_len) + + def test_get_expected_feature_map_variable_names(self, use_keras, + use_depthwise): + depth_multiplier = 1.0 + pad_to_multiple = 1 + + slim_expected_feature_maps_variables = set([ + # Slim Mobilenet V2 feature maps + 'MobilenetV2/expanded_conv_4/depthwise/depthwise_weights', + 'MobilenetV2/expanded_conv_7/depthwise/depthwise_weights', + 'MobilenetV2/expanded_conv_14/depthwise/depthwise_weights', + 'MobilenetV2/Conv_1/weights', + # FPN layers + 'MobilenetV2/fpn/bottom_up_Conv2d_20/weights', + 'MobilenetV2/fpn/bottom_up_Conv2d_21/weights', + 'MobilenetV2/fpn/smoothing_1/weights', + 'MobilenetV2/fpn/smoothing_2/weights', + 'MobilenetV2/fpn/projection_1/weights', + 'MobilenetV2/fpn/projection_2/weights', + 'MobilenetV2/fpn/projection_3/weights', + ]) + slim_expected_feature_maps_variables_with_depthwise = set([ + # Slim Mobilenet V2 feature maps + 'MobilenetV2/expanded_conv_4/depthwise/depthwise_weights', + 'MobilenetV2/expanded_conv_7/depthwise/depthwise_weights', + 'MobilenetV2/expanded_conv_14/depthwise/depthwise_weights', + 'MobilenetV2/Conv_1/weights', + # FPN layers + 'MobilenetV2/fpn/bottom_up_Conv2d_20/pointwise_weights', + 'MobilenetV2/fpn/bottom_up_Conv2d_20/depthwise_weights', + 'MobilenetV2/fpn/bottom_up_Conv2d_21/pointwise_weights', + 'MobilenetV2/fpn/bottom_up_Conv2d_21/depthwise_weights', + 'MobilenetV2/fpn/smoothing_1/depthwise_weights', + 'MobilenetV2/fpn/smoothing_1/pointwise_weights', + 'MobilenetV2/fpn/smoothing_2/depthwise_weights', + 'MobilenetV2/fpn/smoothing_2/pointwise_weights', + 'MobilenetV2/fpn/projection_1/weights', + 'MobilenetV2/fpn/projection_2/weights', + 'MobilenetV2/fpn/projection_3/weights', + ]) + keras_expected_feature_maps_variables = set([ + # Keras Mobilenet V2 feature maps + 'MobilenetV2_FPN/block_4_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/block_7_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/block_14_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/Conv_1/kernel', + # FPN layers + 'MobilenetV2_FPN/bottom_up_Conv2d_20_conv/kernel', + 'MobilenetV2_FPN/bottom_up_Conv2d_21_conv/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_conv/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_conv/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_1/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_2/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_3/kernel' + ]) + keras_expected_feature_maps_variables_with_depthwise = set([ + # Keras Mobilenet V2 feature maps + 'MobilenetV2_FPN/block_4_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/block_7_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/block_14_depthwise/depthwise_kernel', + 'MobilenetV2_FPN/Conv_1/kernel', + # FPN layers + 'MobilenetV2_FPN/bottom_up_Conv2d_20_depthwise_conv/depthwise_kernel', + 'MobilenetV2_FPN/bottom_up_Conv2d_20_depthwise_conv/pointwise_kernel', + 'MobilenetV2_FPN/bottom_up_Conv2d_21_depthwise_conv/depthwise_kernel', + 'MobilenetV2_FPN/bottom_up_Conv2d_21_depthwise_conv/pointwise_kernel', + ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_depthwise_conv/' + 'depthwise_kernel'), + ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_1_depthwise_conv/' + 'pointwise_kernel'), + ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_depthwise_conv/' + 'depthwise_kernel'), + ('MobilenetV2_FPN/FeatureMaps/top_down/smoothing_2_depthwise_conv/' + 'pointwise_kernel'), + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_1/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_2/kernel', + 'MobilenetV2_FPN/FeatureMaps/top_down/projection_3/kernel' + ]) + + g = tf.Graph() + with g.as_default(): + preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) + feature_extractor = self._create_feature_extractor( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras, + use_depthwise=use_depthwise) + if use_keras: + _ = feature_extractor(preprocessed_inputs) + expected_feature_maps_variables = keras_expected_feature_maps_variables + if use_depthwise: + expected_feature_maps_variables = ( + keras_expected_feature_maps_variables_with_depthwise) + else: + _ = feature_extractor.extract_features(preprocessed_inputs) + expected_feature_maps_variables = slim_expected_feature_maps_variables + if use_depthwise: + expected_feature_maps_variables = ( + slim_expected_feature_maps_variables_with_depthwise) + actual_variable_set = set([ + var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + ]) + variable_intersection = expected_feature_maps_variables.intersection( + actual_variable_set) + self.assertSetEqual(expected_feature_maps_variables, + variable_intersection) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v2_keras_feature_extractor.py b/models/ssd_mobilenet_v2_keras_feature_extractor.py new file mode 100644 index 0000000..1cefc74 --- /dev/null +++ b/models/ssd_mobilenet_v2_keras_feature_extractor.py @@ -0,0 +1,167 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for MobilenetV2 features.""" + +import tensorflow as tf + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.models.keras_models import mobilenet_v2 +from object_detection.utils import ops +from object_detection.utils import shape_utils + + +class SSDMobileNetV2KerasFeatureExtractor( + ssd_meta_arch.SSDKerasFeatureExtractor): + """SSD Feature Extractor using MobilenetV2 features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams, + freeze_batchnorm, + inplace_batchnorm_update, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + override_base_feature_extractor_hyperparams=False, + name=None): + """MobileNetV2 Feature Extractor for SSD Models. + + Mobilenet v2 (experimental), designed by sandler@. More details can be found + in //knowledge/cerebra/brain/compression/mobilenet/mobilenet_experimental.py + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor (Functions + as a width multiplier for the mobilenet_v2 network itself). + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object + containing convolution hyperparameters for the layers added on top of + the base feature extractor. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. + use_depthwise: Whether to use depthwise convolutions. Default is False. + num_layers: Number of SSD layers. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + name: A string name scope to assign to the model. If 'None', Keras + will auto-generate one from the class name. + """ + super(SSDMobileNetV2KerasFeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + num_layers=num_layers, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams, + name=name) + self._feature_map_layout = { + 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', '' + ][:self._num_layers], + 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], + 'use_depthwise': self._use_depthwise, + 'use_explicit_padding': self._use_explicit_padding, + } + + self.mobilenet_v2 = None + self.feature_map_generator = None + + def build(self, input_shape): + full_mobilenet_v2 = mobilenet_v2.mobilenet_v2( + batchnorm_training=(self._is_training and not self._freeze_batchnorm), + conv_hyperparams=(self._conv_hyperparams + if self._override_base_feature_extractor_hyperparams + else None), + weights=None, + use_explicit_padding=self._use_explicit_padding, + alpha=self._depth_multiplier, + min_depth=self._min_depth, + include_top=False) + conv2d_11_pointwise = full_mobilenet_v2.get_layer( + name='block_13_expand_relu').output + conv2d_13_pointwise = full_mobilenet_v2.get_layer(name='out_relu').output + self.mobilenet_v2 = tf.keras.Model( + inputs=full_mobilenet_v2.inputs, + outputs=[conv2d_11_pointwise, conv2d_13_pointwise]) + self.feature_map_generator = ( + feature_map_generators.KerasMultiResolutionFeatureMaps( + feature_map_layout=self._feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + is_training=self._is_training, + conv_hyperparams=self._conv_hyperparams, + freeze_batchnorm=self._freeze_batchnorm, + name='FeatureMaps')) + self.built = True + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def _extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + image_features = self.mobilenet_v2( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple)) + + feature_maps = self.feature_map_generator({ + 'layer_15/expansion_output': image_features[0], + 'layer_19': image_features[1]}) + + return feature_maps.values() diff --git a/models/ssd_mobilenet_v3_feature_extractor.py b/models/ssd_mobilenet_v3_feature_extractor.py new file mode 100644 index 0000000..6c87b71 --- /dev/null +++ b/models/ssd_mobilenet_v3_feature_extractor.py @@ -0,0 +1,220 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""SSDFeatureExtractor for MobileNetV3 features.""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets.mobilenet import mobilenet +from nets.mobilenet import mobilenet_v3 + +slim = contrib_slim + + +class SSDMobileNetV3FeatureExtractorBase(ssd_meta_arch.SSDFeatureExtractor): + """Base class of SSD feature extractor using MobilenetV3 features.""" + + def __init__(self, + conv_defs, + from_layer, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + override_base_feature_extractor_hyperparams=False, + scope_name='MobilenetV3'): + """MobileNetV3 Feature Extractor for SSD Models. + + MobileNet v3. Details found in: + https://arxiv.org/abs/1905.02244 + + Args: + conv_defs: MobileNetV3 conv defs for backbone. + from_layer: A cell of two layer names (string) to connect to the 1st and + 2nd inputs of the SSD head. + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the base + feature extractor. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. + use_depthwise: Whether to use depthwise convolutions. Default is False. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + scope_name: scope name (string) of network variables. + """ + super(SSDMobileNetV3FeatureExtractorBase, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams + ) + self._conv_defs = conv_defs + self._from_layer = from_layer + self._scope_name = scope_name + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + Raises: + ValueError if conv_defs is not provided or from_layer does not meet the + size requirement. + """ + + if not self._conv_defs: + raise ValueError('Must provide backbone conv defs.') + + if len(self._from_layer) != 2: + raise ValueError('SSD input feature names are not provided.') + + preprocessed_inputs = shape_utils.check_min_image_dim( + 33, preprocessed_inputs) + + feature_map_layout = { + 'from_layer': [ + self._from_layer[0], self._from_layer[1], '', '', '', '' + ], + 'layer_depth': [-1, -1, 512, 256, 256, 128], + 'use_depthwise': self._use_depthwise, + 'use_explicit_padding': self._use_explicit_padding, + } + + with tf.variable_scope( + self._scope_name, reuse=self._reuse_weights) as scope: + with slim.arg_scope( + mobilenet_v3.training_scope(is_training=None, bn_decay=0.9997)), \ + slim.arg_scope( + [mobilenet.depth_multiplier], min_depth=self._min_depth): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams else + context_manager.IdentityContextManager()): + _, image_features = mobilenet_v3.mobilenet_base( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + conv_defs=self._conv_defs, + final_endpoint=self._from_layer[1], + depth_multiplier=self._depth_multiplier, + use_explicit_padding=self._use_explicit_padding, + scope=scope) + with slim.arg_scope(self._conv_hyperparams_fn()): + feature_maps = feature_map_generators.multi_resolution_feature_maps( + feature_map_layout=feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + image_features=image_features) + + return feature_maps.values() + + +class SSDMobileNetV3LargeFeatureExtractor(SSDMobileNetV3FeatureExtractorBase): + """Mobilenet V3-Large feature extractor.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + override_base_feature_extractor_hyperparams=False, + scope_name='MobilenetV3'): + super(SSDMobileNetV3LargeFeatureExtractor, self).__init__( + conv_defs=mobilenet_v3.V3_LARGE_DETECTION, + from_layer=['layer_14/expansion_output', 'layer_17'], + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams, + scope_name=scope_name + ) + + +class SSDMobileNetV3SmallFeatureExtractor(SSDMobileNetV3FeatureExtractorBase): + """Mobilenet V3-Small feature extractor.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + override_base_feature_extractor_hyperparams=False, + scope_name='MobilenetV3'): + super(SSDMobileNetV3SmallFeatureExtractor, self).__init__( + conv_defs=mobilenet_v3.V3_SMALL_DETECTION, + from_layer=['layer_10/expansion_output', 'layer_13'], + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams, + scope_name=scope_name + ) diff --git a/models/ssd_mobilenet_v3_feature_extractor_test.py b/models/ssd_mobilenet_v3_feature_extractor_test.py new file mode 100644 index 0000000..6ddde4b --- /dev/null +++ b/models/ssd_mobilenet_v3_feature_extractor_test.py @@ -0,0 +1,106 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ssd_mobilenet_v3_feature_extractor.""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_mobilenet_v3_feature_extractor +from object_detection.models import ssd_mobilenet_v3_feature_extractor_testbase + + +slim = contrib_slim + + +class SsdMobilenetV3LargeFeatureExtractorTest( + ssd_mobilenet_v3_feature_extractor_testbase + ._SsdMobilenetV3FeatureExtractorTestBase): + + def _get_input_sizes(self): + """Return first two input feature map sizes.""" + return [672, 480] + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + use_keras=False): + """Constructs a new Mobilenet V3-Large feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + return ( + ssd_mobilenet_v3_feature_extractor.SSDMobileNetV3LargeFeatureExtractor( + False, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding)) + + +class SsdMobilenetV3SmallFeatureExtractorTest( + ssd_mobilenet_v3_feature_extractor_testbase + ._SsdMobilenetV3FeatureExtractorTestBase): + + def _get_input_sizes(self): + """Return first two input feature map sizes.""" + return [288, 288] + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + use_keras=False): + """Constructs a new Mobilenet V3-Small feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + use_keras: if True builds a keras-based feature extractor, if False builds + a slim-based one. + + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + return ( + ssd_mobilenet_v3_feature_extractor.SSDMobileNetV3SmallFeatureExtractor( + False, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_mobilenet_v3_feature_extractor_testbase.py b/models/ssd_mobilenet_v3_feature_extractor_testbase.py new file mode 100644 index 0000000..d2d0e09 --- /dev/null +++ b/models/ssd_mobilenet_v3_feature_extractor_testbase.py @@ -0,0 +1,116 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base test class for ssd_mobilenet_v3_feature_extractor.""" + +import abc + +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test + + +slim = contrib_slim + + +class _SsdMobilenetV3FeatureExtractorTestBase( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + """Base class for MobilenetV3 tests.""" + + @abc.abstractmethod + def _get_input_sizes(self): + """Return feature map sizes for the two inputs to SSD head.""" + pass + + def test_extract_features_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + input_feature_sizes = self._get_input_sizes() + expected_feature_map_shape = [(2, 8, 8, input_feature_sizes[0]), + (2, 4, 4, input_feature_sizes[1]), + (2, 2, 2, 512), (2, 1, 1, 256), (2, 1, 1, + 256), + (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_keras=False) + + def test_extract_features_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + input_feature_sizes = self._get_input_sizes() + expected_feature_map_shape = [(2, 19, 19, input_feature_sizes[0]), + (2, 10, 10, input_feature_sizes[1]), + (2, 5, 5, 512), (2, 3, 3, 256), (2, 2, 2, + 256), + (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, + image_height, + image_width, + depth_multiplier, + pad_to_multiple, + expected_feature_map_shape, + use_keras=False) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 32 + input_feature_sizes = self._get_input_sizes() + expected_feature_map_shape = [(2, 20, 20, input_feature_sizes[0]), + (2, 10, 10, input_feature_sizes[1]), + (2, 5, 5, 512), (2, 3, 3, 256), (2, 2, 2, + 256), + (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(4, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor( + depth_multiplier, pad_to_multiple, use_keras=False) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_has_fused_batchnorm(self): + image_height = 40 + image_width = 40 + depth_multiplier = 1 + pad_to_multiple = 1 + image_placeholder = tf.placeholder(tf.float32, + [1, image_height, image_width, 3]) + feature_extractor = self._create_feature_extractor( + depth_multiplier, pad_to_multiple, use_keras=False) + preprocessed_image = feature_extractor.preprocess(image_placeholder) + _ = feature_extractor.extract_features(preprocessed_image) + self.assertTrue(any('FusedBatchNorm' in op.type + for op in tf.get_default_graph().get_operations())) diff --git a/models/ssd_pnasnet_feature_extractor.py b/models/ssd_pnasnet_feature_extractor.py new file mode 100644 index 0000000..d0475b2 --- /dev/null +++ b/models/ssd_pnasnet_feature_extractor.py @@ -0,0 +1,180 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""SSDFeatureExtractor for PNASNet features. + +Based on PNASNet ImageNet model: https://arxiv.org/abs/1712.00559 +""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import variables_helper +from nets.nasnet import pnasnet + +slim = contrib_slim + + +def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False): + """Defines the default arg scope for the PNASNet Large for object detection. + + This provides a small edit to switch batch norm training on and off. + + Args: + is_batch_norm_training: Boolean indicating whether to train with batch norm. + Default is False. + + Returns: + An `arg_scope` to use for the PNASNet Large Model. + """ + imagenet_scope = pnasnet.pnasnet_large_arg_scope() + with slim.arg_scope(imagenet_scope): + with slim.arg_scope([slim.batch_norm], + is_training=is_batch_norm_training) as sc: + return sc + + +class SSDPNASNetFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD Feature Extractor using PNASNet features.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + num_layers=6, + override_base_feature_extractor_hyperparams=False): + """PNASNet Feature Extractor for SSD Models. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + use_depthwise: Whether to use depthwise convolutions. + num_layers: Number of SSD layers. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDPNASNetFeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + num_layers=num_layers, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + Maps pixel values to the range [-1, 1]. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + return (2.0 / 255.0) * resized_inputs - 1.0 + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + + feature_map_layout = { + 'from_layer': ['Cell_7', 'Cell_11', '', '', '', ''][:self._num_layers], + 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], + 'use_explicit_padding': self._use_explicit_padding, + 'use_depthwise': self._use_depthwise, + } + + with slim.arg_scope( + pnasnet_large_arg_scope_for_detection( + is_batch_norm_training=self._is_training)): + with slim.arg_scope([slim.conv2d, slim.batch_norm, slim.separable_conv2d], + reuse=self._reuse_weights): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams else + context_manager.IdentityContextManager()): + _, image_features = pnasnet.build_pnasnet_large( + ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), + num_classes=None, + is_training=self._is_training, + final_endpoint='Cell_11') + with tf.variable_scope('SSD_feature_maps', reuse=self._reuse_weights): + with slim.arg_scope(self._conv_hyperparams_fn()): + feature_maps = feature_map_generators.multi_resolution_feature_maps( + feature_map_layout=feature_map_layout, + depth_multiplier=self._depth_multiplier, + min_depth=self._min_depth, + insert_1x1_conv=True, + image_features=image_features) + + return feature_maps.values() + + def restore_from_classification_checkpoint_fn(self, feature_extractor_scope): + """Returns a map of variables to load from a foreign checkpoint. + + Note that this overrides the default implementation in + ssd_meta_arch.SSDFeatureExtractor which does not work for PNASNet + checkpoints. + + Args: + feature_extractor_scope: A scope name for the first stage feature + extractor. + + Returns: + A dict mapping variable names (to load from a checkpoint) to variables in + the model graph. + """ + variables_to_restore = {} + for variable in variables_helper.get_global_variables_safely(): + if variable.op.name.startswith(feature_extractor_scope): + var_name = variable.op.name.replace(feature_extractor_scope + '/', '') + var_name += '/ExponentialMovingAverage' + variables_to_restore[var_name] = variable + return variables_to_restore diff --git a/models/ssd_pnasnet_feature_extractor_test.py b/models/ssd_pnasnet_feature_extractor_test.py new file mode 100644 index 0000000..9dfafe5 --- /dev/null +++ b/models/ssd_pnasnet_feature_extractor_test.py @@ -0,0 +1,108 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for ssd_pnas_feature_extractor.""" +import numpy as np +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.models import ssd_feature_extractor_test +from object_detection.models import ssd_pnasnet_feature_extractor + +slim = contrib_slim + + +class SsdPnasNetFeatureExtractorTest( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + num_layers=6, + is_training=True): + """Constructs a new feature extractor. + + Args: + depth_multiplier: float depth multiplier for feature extractor + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + num_layers: number of SSD layers. + is_training: whether the network is in training mode. + Returns: + an ssd_meta_arch.SSDFeatureExtractor object. + """ + min_depth = 32 + return ssd_pnasnet_feature_extractor.SSDPNASNetFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding, + num_layers=num_layers) + + def test_extract_features_returns_correct_shapes_128(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 2160), (2, 4, 4, 4320), + (2, 2, 2, 512), (2, 1, 1, 256), + (2, 1, 1, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_299(self): + image_height = 299 + image_width = 299 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 2160), (2, 10, 10, 4320), + (2, 5, 5, 512), (2, 3, 3, 256), + (2, 2, 2, 256), (2, 1, 1, 128)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = np.random.rand(2, image_height, image_width, 3) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(test_image) + self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) + + def test_extract_features_with_fewer_layers(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 8, 8, 2160), (2, 4, 4, 4320), + (2, 2, 2, 512), (2, 1, 1, 256)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, num_layers=4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_resnet_v1_fpn_feature_extractor.py b/models/ssd_resnet_v1_fpn_feature_extractor.py new file mode 100644 index 0000000..4e7618f --- /dev/null +++ b/models/ssd_resnet_v1_fpn_feature_extractor.py @@ -0,0 +1,387 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1. + +See https://arxiv.org/abs/1708.02002 for details. +""" + +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.meta_architectures import ssd_meta_arch +from object_detection.models import feature_map_generators +from object_detection.utils import context_manager +from object_detection.utils import ops +from object_detection.utils import shape_utils +from nets import resnet_v1 + +slim = contrib_slim + + +class SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): + """SSD FPN feature extractor based on Resnet v1 architecture.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + resnet_base_fn, + resnet_scope_name, + fpn_scope_name, + fpn_min_level=3, + fpn_max_level=7, + additional_layer_depth=256, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + use_native_resize_op=False, + override_base_feature_extractor_hyperparams=False): + """SSD FPN feature extractor based on Resnet v1 architecture. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + resnet_base_fn: base resnet network to use. + resnet_scope_name: scope name under which to construct resnet + fpn_scope_name: scope name under which to construct the feature pyramid + network. + fpn_min_level: the highest resolution feature map to use in FPN. The valid + values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4} + respectively. + fpn_max_level: the smallest resolution feature map to construct or use in + FPN. FPN constructions uses features maps starting from fpn_min_level + upto the fpn_max_level. In the case that there are not enough feature + maps in the backbone network, additional feature maps are created by + applying stride 2 convolutions until we get the desired number of fpn + levels. + additional_layer_depth: additional feature map layer channel depth. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. UNUSED currently. + use_depthwise: Whether to use depthwise convolutions. UNUSED currently. + use_native_resize_op: Whether to use tf.image.nearest_neighbor_resize + to do upsampling in FPN. Default is false. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + + Raises: + ValueError: On supplying invalid arguments for unused arguments. + """ + super(SSDResnetV1FpnFeatureExtractor, self).__init__( + is_training=is_training, + depth_multiplier=depth_multiplier, + min_depth=min_depth, + pad_to_multiple=pad_to_multiple, + conv_hyperparams_fn=conv_hyperparams_fn, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + if self._use_explicit_padding is True: + raise ValueError('Explicit padding is not a valid option.') + self._resnet_base_fn = resnet_base_fn + self._resnet_scope_name = resnet_scope_name + self._fpn_scope_name = fpn_scope_name + self._fpn_min_level = fpn_min_level + self._fpn_max_level = fpn_max_level + self._additional_layer_depth = additional_layer_depth + self._use_native_resize_op = use_native_resize_op + + def preprocess(self, resized_inputs): + """SSD preprocessing. + + VGG style channel mean subtraction as described here: + https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge. + Note that if the number of channels is not equal to 3, the mean subtraction + will be skipped and the original resized_inputs will be returned. + + Args: + resized_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + """ + if resized_inputs.shape.as_list()[3] == 3: + channel_means = [123.68, 116.779, 103.939] + return resized_inputs - [[channel_means]] + else: + return resized_inputs + + def _filter_features(self, image_features): + # TODO(rathodv): Change resnet endpoint to strip scope prefixes instead + # of munging the scope here. + filtered_image_features = dict({}) + for key, feature in image_features.items(): + feature_name = key.split('/')[-1] + if feature_name in ['block1', 'block2', 'block3', 'block4']: + filtered_image_features[feature_name] = feature + return filtered_image_features + + def extract_features(self, preprocessed_inputs): + """Extract features from preprocessed inputs. + + Args: + preprocessed_inputs: a [batch, height, width, channels] float tensor + representing a batch of images. + + Returns: + feature_maps: a list of tensors where the ith tensor has shape + [batch, height_i, width_i, depth_i] + """ + preprocessed_inputs = shape_utils.check_min_image_dim( + 129, preprocessed_inputs) + + with tf.variable_scope( + self._resnet_scope_name, reuse=self._reuse_weights) as scope: + with slim.arg_scope(resnet_v1.resnet_arg_scope()): + with (slim.arg_scope(self._conv_hyperparams_fn()) + if self._override_base_feature_extractor_hyperparams else + context_manager.IdentityContextManager()): + _, image_features = self._resnet_base_fn( + inputs=ops.pad_to_multiple(preprocessed_inputs, + self._pad_to_multiple), + num_classes=None, + is_training=None, + global_pool=False, + output_stride=None, + store_non_strided_activations=True, + min_base_depth=self._min_depth, + depth_multiplier=self._depth_multiplier, + scope=scope) + image_features = self._filter_features(image_features) + depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) + with slim.arg_scope(self._conv_hyperparams_fn()): + with tf.variable_scope(self._fpn_scope_name, + reuse=self._reuse_weights): + base_fpn_max_level = min(self._fpn_max_level, 5) + feature_block_list = [] + for level in range(self._fpn_min_level, base_fpn_max_level + 1): + feature_block_list.append('block{}'.format(level - 1)) + fpn_features = feature_map_generators.fpn_top_down_feature_maps( + [(key, image_features[key]) for key in feature_block_list], + depth=depth_fn(self._additional_layer_depth), + use_native_resize_op=self._use_native_resize_op) + feature_maps = [] + for level in range(self._fpn_min_level, base_fpn_max_level + 1): + feature_maps.append( + fpn_features['top_down_block{}'.format(level - 1)]) + last_feature_map = fpn_features['top_down_block{}'.format( + base_fpn_max_level - 1)] + # Construct coarse features + for i in range(base_fpn_max_level, self._fpn_max_level): + last_feature_map = slim.conv2d( + last_feature_map, + num_outputs=depth_fn(self._additional_layer_depth), + kernel_size=[3, 3], + stride=2, + padding='SAME', + scope='bottom_up_block{}'.format(i)) + feature_maps.append(last_feature_map) + return feature_maps + + +class SSDResnet50V1FpnFeatureExtractor(SSDResnetV1FpnFeatureExtractor): + """SSD Resnet50 V1 FPN feature extractor.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + fpn_min_level=3, + fpn_max_level=7, + additional_layer_depth=256, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + use_native_resize_op=False, + override_base_feature_extractor_hyperparams=False): + """SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + fpn_min_level: the minimum level in feature pyramid networks. + fpn_max_level: the maximum level in feature pyramid networks. + additional_layer_depth: additional feature map layer channel depth. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. UNUSED currently. + use_depthwise: Whether to use depthwise convolutions. UNUSED currently. + use_native_resize_op: Whether to use tf.image.nearest_neighbor_resize + to do upsampling in FPN. Default is false. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDResnet50V1FpnFeatureExtractor, self).__init__( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + resnet_v1.resnet_v1_50, + 'resnet_v1_50', + 'fpn', + fpn_min_level, + fpn_max_level, + additional_layer_depth, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + use_native_resize_op=use_native_resize_op, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + + +class SSDResnet101V1FpnFeatureExtractor(SSDResnetV1FpnFeatureExtractor): + """SSD Resnet101 V1 FPN feature extractor.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + fpn_min_level=3, + fpn_max_level=7, + additional_layer_depth=256, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + use_native_resize_op=False, + override_base_feature_extractor_hyperparams=False): + """SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + fpn_min_level: the minimum level in feature pyramid networks. + fpn_max_level: the maximum level in feature pyramid networks. + additional_layer_depth: additional feature map layer channel depth. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. UNUSED currently. + use_depthwise: Whether to use depthwise convolutions. UNUSED currently. + use_native_resize_op: Whether to use tf.image.nearest_neighbor_resize + to do upsampling in FPN. Default is false. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDResnet101V1FpnFeatureExtractor, self).__init__( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + resnet_v1.resnet_v1_101, + 'resnet_v1_101', + 'fpn', + fpn_min_level, + fpn_max_level, + additional_layer_depth, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + use_native_resize_op=use_native_resize_op, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) + + +class SSDResnet152V1FpnFeatureExtractor(SSDResnetV1FpnFeatureExtractor): + """SSD Resnet152 V1 FPN feature extractor.""" + + def __init__(self, + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + fpn_min_level=3, + fpn_max_level=7, + additional_layer_depth=256, + reuse_weights=None, + use_explicit_padding=False, + use_depthwise=False, + use_native_resize_op=False, + override_base_feature_extractor_hyperparams=False): + """SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture. + + Args: + is_training: whether the network is in training mode. + depth_multiplier: float depth multiplier for feature extractor. + min_depth: minimum feature extractor depth. + pad_to_multiple: the nearest multiple to zero pad the input height and + width dimensions to. + conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d + and separable_conv2d ops in the layers that are added on top of the + base feature extractor. + fpn_min_level: the minimum level in feature pyramid networks. + fpn_max_level: the maximum level in feature pyramid networks. + additional_layer_depth: additional feature map layer channel depth. + reuse_weights: Whether to reuse variables. Default is None. + use_explicit_padding: Whether to use explicit padding when extracting + features. Default is False. UNUSED currently. + use_depthwise: Whether to use depthwise convolutions. UNUSED currently. + use_native_resize_op: Whether to use tf.image.nearest_neighbor_resize + to do upsampling in FPN. Default is false. + override_base_feature_extractor_hyperparams: Whether to override + hyperparameters of the base feature extractor with the one from + `conv_hyperparams_fn`. + """ + super(SSDResnet152V1FpnFeatureExtractor, self).__init__( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + conv_hyperparams_fn, + resnet_v1.resnet_v1_152, + 'resnet_v1_152', + 'fpn', + fpn_min_level, + fpn_max_level, + additional_layer_depth, + reuse_weights=reuse_weights, + use_explicit_padding=use_explicit_padding, + use_depthwise=use_depthwise, + use_native_resize_op=use_native_resize_op, + override_base_feature_extractor_hyperparams= + override_base_feature_extractor_hyperparams) diff --git a/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py b/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py new file mode 100644 index 0000000..65d0d96 --- /dev/null +++ b/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py @@ -0,0 +1,184 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ssd resnet v1 FPN feature extractors.""" +import abc +import itertools +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from object_detection.models import ssd_feature_extractor_test + + +@parameterized.parameters( + {'use_keras': False}, + {'use_keras': True}, +) +class SSDResnetFPNFeatureExtractorTestBase( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + """Helper test class for SSD Resnet v1 FPN feature extractors.""" + + @abc.abstractmethod + def _resnet_scope_name(self, use_keras): + pass + + @abc.abstractmethod + def _fpn_scope_name(self): + return 'fpn' + + @abc.abstractmethod + def _create_feature_extractor(self, + depth_multiplier, + pad_to_multiple, + use_explicit_padding=False, + min_depth=32, + use_keras=False): + pass + + def test_extract_features_returns_correct_shapes_256(self, use_keras): + image_height = 256 + image_width = 256 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), + (2, 8, 8, 256), (2, 4, 4, 256), + (2, 2, 2, 256)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_dynamic_inputs( + self, use_keras): + image_height = 256 + image_width = 256 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), + (2, 8, 8, 256), (2, 4, 4, 256), + (2, 2, 2, 256)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_with_depth_multiplier( + self, use_keras): + image_height = 256 + image_width = 256 + depth_multiplier = 0.5 + expected_num_channels = int(256 * depth_multiplier) + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 32, 32, expected_num_channels), + (2, 16, 16, expected_num_channels), + (2, 8, 8, expected_num_channels), + (2, 4, 4, expected_num_channels), + (2, 2, 2, expected_num_channels)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_returns_correct_shapes_with_min_depth( + self, use_keras): + image_height = 256 + image_width = 256 + depth_multiplier = 1.0 + pad_to_multiple = 1 + min_depth = 320 + expected_feature_map_shape = [(2, 32, 32, min_depth), + (2, 16, 16, min_depth), + (2, 8, 8, min_depth), + (2, 4, 4, min_depth), + (2, 2, 2, min_depth)] + + def graph_fn(image_tensor): + feature_extractor = self._create_feature_extractor( + depth_multiplier, pad_to_multiple, min_depth=min_depth, + use_keras=use_keras) + if use_keras: + return feature_extractor(image_tensor) + return feature_extractor.extract_features(image_tensor) + + image_tensor = np.random.rand(2, image_height, image_width, + 3).astype(np.float32) + feature_maps = self.execute(graph_fn, [image_tensor]) + for feature_map, expected_shape in itertools.izip( + feature_maps, expected_feature_map_shape): + self.assertAllEqual(feature_map.shape, expected_shape) + + def test_extract_features_returns_correct_shapes_with_pad_to_multiple( + self, use_keras): + image_height = 254 + image_width = 254 + depth_multiplier = 1.0 + pad_to_multiple = 32 + expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256), + (2, 8, 8, 256), (2, 4, 4, 256), + (2, 2, 2, 256)] + + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape, use_keras=use_keras) + + def test_extract_features_raises_error_with_invalid_image_size( + self, use_keras): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple, + use_keras=use_keras) + + def test_preprocess_returns_correct_value_range(self, use_keras): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = tf.constant(np.random.rand(4, image_height, image_width, 3)) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple, + use_keras=use_keras) + preprocessed_image = feature_extractor.preprocess(test_image) + with self.test_session() as sess: + test_image_out, preprocessed_image_out = sess.run( + [test_image, preprocessed_image]) + self.assertAllClose(preprocessed_image_out, + test_image_out - [[123.68, 116.779, 103.939]]) + + def test_variables_only_created_in_scope(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + scope_name = self._resnet_scope_name(use_keras) + self.check_feature_extractor_variables_under_scope( + depth_multiplier, + pad_to_multiple, + scope_name, + use_keras=use_keras) + + def test_variable_count(self, use_keras): + depth_multiplier = 1 + pad_to_multiple = 1 + variables = self.get_feature_extractor_variables( + depth_multiplier, + pad_to_multiple, + use_keras=use_keras) + # The number of expected variables in resnet_v1_50, resnet_v1_101, + # and resnet_v1_152 is 279, 534, and 789 respectively. + expected_variables_len = 279 + scope_name = self._resnet_scope_name(use_keras) + if scope_name in ('ResNet101V1_FPN', 'resnet_v1_101'): + expected_variables_len = 534 + elif scope_name in ('ResNet152V1_FPN', 'resnet_v1_152'): + expected_variables_len = 789 + self.assertEqual(len(variables), expected_variables_len) diff --git a/models/ssd_resnet_v1_ppn_feature_extractor_test.py b/models/ssd_resnet_v1_ppn_feature_extractor_test.py new file mode 100644 index 0000000..c47cd12 --- /dev/null +++ b/models/ssd_resnet_v1_ppn_feature_extractor_test.py @@ -0,0 +1,88 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ssd resnet v1 feature extractors.""" +import tensorflow as tf + +from object_detection.models import ssd_resnet_v1_ppn_feature_extractor +from object_detection.models import ssd_resnet_v1_ppn_feature_extractor_testbase + + +class SSDResnet50V1PpnFeatureExtractorTest( + ssd_resnet_v1_ppn_feature_extractor_testbase. + SSDResnetPpnFeatureExtractorTestBase): + """SSDResnet50v1 feature extractor test.""" + + def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, + use_explicit_padding=False): + min_depth = 32 + is_training = True + return ssd_resnet_v1_ppn_feature_extractor.SSDResnet50V1PpnFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding) + + def _scope_name(self): + return 'resnet_v1_50' + + +class SSDResnet101V1PpnFeatureExtractorTest( + ssd_resnet_v1_ppn_feature_extractor_testbase. + SSDResnetPpnFeatureExtractorTestBase): + """SSDResnet101v1 feature extractor test.""" + + def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, + use_explicit_padding=False): + min_depth = 32 + is_training = True + return ( + ssd_resnet_v1_ppn_feature_extractor.SSDResnet101V1PpnFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding)) + + def _scope_name(self): + return 'resnet_v1_101' + + +class SSDResnet152V1PpnFeatureExtractorTest( + ssd_resnet_v1_ppn_feature_extractor_testbase. + SSDResnetPpnFeatureExtractorTestBase): + """SSDResnet152v1 feature extractor test.""" + + def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, + use_explicit_padding=False): + min_depth = 32 + is_training = True + return ( + ssd_resnet_v1_ppn_feature_extractor.SSDResnet152V1PpnFeatureExtractor( + is_training, + depth_multiplier, + min_depth, + pad_to_multiple, + self.conv_hyperparams_fn, + use_explicit_padding=use_explicit_padding)) + + def _scope_name(self): + return 'resnet_v1_152' + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py b/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py new file mode 100644 index 0000000..3857fc7 --- /dev/null +++ b/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py @@ -0,0 +1,82 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ssd resnet v1 feature extractors.""" +import abc +import numpy as np +import tensorflow as tf + +from object_detection.models import ssd_feature_extractor_test + + +class SSDResnetPpnFeatureExtractorTestBase( + ssd_feature_extractor_test.SsdFeatureExtractorTestBase): + """Helper test class for SSD Resnet PPN feature extractors.""" + + @abc.abstractmethod + def _scope_name(self): + pass + + def test_extract_features_returns_correct_shapes_289(self): + image_height = 289 + image_width = 289 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024), + (2, 5, 5, 1024), (2, 3, 3, 1024), + (2, 2, 2, 1024), (2, 1, 1, 1024)] + self.check_extract_features_returns_correct_shape( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self): + image_height = 289 + image_width = 289 + depth_multiplier = 1.0 + pad_to_multiple = 1 + expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024), + (2, 5, 5, 1024), (2, 3, 3, 1024), + (2, 2, 2, 1024), (2, 1, 1, 1024)] + self.check_extract_features_returns_correct_shapes_with_dynamic_inputs( + 2, image_height, image_width, depth_multiplier, pad_to_multiple, + expected_feature_map_shape) + + def test_extract_features_raises_error_with_invalid_image_size(self): + image_height = 32 + image_width = 32 + depth_multiplier = 1.0 + pad_to_multiple = 1 + self.check_extract_features_raises_error_with_invalid_image_size( + image_height, image_width, depth_multiplier, pad_to_multiple) + + def test_preprocess_returns_correct_value_range(self): + image_height = 128 + image_width = 128 + depth_multiplier = 1 + pad_to_multiple = 1 + test_image = tf.constant(np.random.rand(4, image_height, image_width, 3)) + feature_extractor = self._create_feature_extractor(depth_multiplier, + pad_to_multiple) + preprocessed_image = feature_extractor.preprocess(test_image) + with self.test_session() as sess: + test_image_out, preprocessed_image_out = sess.run( + [test_image, preprocessed_image]) + self.assertAllClose(preprocessed_image_out, + test_image_out - [[123.68, 116.779, 103.939]]) + + def test_variables_only_created_in_scope(self): + depth_multiplier = 1 + pad_to_multiple = 1 + self.check_feature_extractor_variables_under_scope( + depth_multiplier, pad_to_multiple, self._scope_name()) diff --git a/predictors/convolutional_box_predictor.py b/predictors/convolutional_box_predictor.py new file mode 100644 index 0000000..827dfa9 --- /dev/null +++ b/predictors/convolutional_box_predictor.py @@ -0,0 +1,416 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Convolutional Box Predictors with and without weight sharing.""" +import functools +import tensorflow as tf +from object_detection.core import box_predictor +from object_detection.utils import shape_utils +from object_detection.utils import static_shape + +slim = tf.contrib.slim + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class _NoopVariableScope(object): + """A dummy class that does not push any scope.""" + + def __enter__(self): + return None + + def __exit__(self, exc_type, exc_value, traceback): + return False + + +class ConvolutionalBoxPredictor(box_predictor.BoxPredictor): + """Convolutional Box Predictor. + + Optionally add an intermediate 1x1 convolutional layer after features and + predict in parallel branches box_encodings and + class_predictions_with_background. + + Currently this box predictor assumes that predictions are "shared" across + classes --- that is each anchor makes box predictions which do not depend + on class. + """ + + def __init__(self, + is_training, + num_classes, + box_prediction_head, + class_prediction_head, + other_heads, + conv_hyperparams_fn, + num_layers_before_predictor, + min_depth, + max_depth): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + box_prediction_head: The head that predicts the boxes. + class_prediction_head: The head that predicts the classes. + other_heads: A dictionary mapping head names to convolutional + head classes. + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + min_depth: Minimum feature depth prior to predicting box encodings + and class predictions. + max_depth: Maximum feature depth prior to predicting box encodings + and class predictions. If max_depth is set to 0, no additional + feature map will be inserted before location and class predictions. + + Raises: + ValueError: if min_depth > max_depth. + """ + super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes) + self._box_prediction_head = box_prediction_head + self._class_prediction_head = class_prediction_head + self._other_heads = other_heads + self._conv_hyperparams_fn = conv_hyperparams_fn + self._min_depth = min_depth + self._max_depth = max_depth + self._num_layers_before_predictor = num_layers_before_predictor + + @property + def num_classes(self): + return self._num_classes + + def _predict(self, image_features, num_predictions_per_location_list): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + num_predictions_per_location_list: A list of integers representing the + number of box predictions to be made per spatial location for each + feature map. + + Returns: + A dictionary containing: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. Each entry in the + list corresponds to a feature map in the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + (optional) Predictions from other heads. + """ + predictions = { + BOX_ENCODINGS: [], + CLASS_PREDICTIONS_WITH_BACKGROUND: [], + } + for head_name in self._other_heads.keys(): + predictions[head_name] = [] + # TODO(rathodv): Come up with a better way to generate scope names + # in box predictor once we have time to retrain all models in the zoo. + # The following lines create scope names to be backwards compatible with the + # existing checkpoints. + box_predictor_scopes = [_NoopVariableScope()] + if len(image_features) > 1: + box_predictor_scopes = [ + tf.variable_scope('BoxPredictor_{}'.format(i)) + for i in range(len(image_features)) + ] + for (image_feature, + num_predictions_per_location, box_predictor_scope) in zip( + image_features, num_predictions_per_location_list, + box_predictor_scopes): + net = image_feature + with box_predictor_scope: + with slim.arg_scope(self._conv_hyperparams_fn()): + with slim.arg_scope([slim.dropout], is_training=self._is_training): + # Add additional conv layers before the class predictor. + features_depth = static_shape.get_depth(image_feature.get_shape()) + depth = max(min(features_depth, self._max_depth), self._min_depth) + tf.logging.info('depth of additional conv before box predictor: {}'. + format(depth)) + if depth > 0 and self._num_layers_before_predictor > 0: + for i in range(self._num_layers_before_predictor): + net = slim.conv2d( + net, + depth, [1, 1], + reuse=tf.AUTO_REUSE, + scope='Conv2d_%d_1x1_%d' % (i, depth)) + sorted_keys = sorted(self._other_heads.keys()) + sorted_keys.append(BOX_ENCODINGS) + sorted_keys.append(CLASS_PREDICTIONS_WITH_BACKGROUND) + for head_name in sorted_keys: + if head_name == BOX_ENCODINGS: + head_obj = self._box_prediction_head + elif head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: + head_obj = self._class_prediction_head + else: + head_obj = self._other_heads[head_name] + prediction = head_obj.predict( + features=net, + num_predictions_per_location=num_predictions_per_location) + predictions[head_name].append(prediction) + return predictions + + +# TODO(rathodv): Replace with slim.arg_scope_func_key once its available +# externally. +def _arg_scope_func_key(op): + """Returns a key that can be used to index arg_scope dictionary.""" + return getattr(op, '_key_op', str(op)) + + +# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above +# since they are very similar. +class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): + """Convolutional Box Predictor with weight sharing. + + Defines the box predictor as defined in + https://arxiv.org/abs/1708.02002. This class differs from + ConvolutionalBoxPredictor in that it shares weights and biases while + predicting from different feature maps. However, batch_norm parameters are not + shared because the statistics of the activations vary among the different + feature maps. + + Also note that separate multi-layer towers are constructed for the box + encoding and class predictors respectively. + """ + + def __init__(self, + is_training, + num_classes, + box_prediction_head, + class_prediction_head, + other_heads, + conv_hyperparams_fn, + depth, + num_layers_before_predictor, + kernel_size=3, + apply_batch_norm=False, + share_prediction_tower=False, + use_depthwise=False): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + box_prediction_head: The head that predicts the boxes. + class_prediction_head: The head that predicts the classes. + other_heads: A dictionary mapping head names to convolutional + head classes. + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + depth: depth of conv layers. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + kernel_size: Size of final convolution kernel. + apply_batch_norm: Whether to apply batch normalization to conv layers in + this predictor. + share_prediction_tower: Whether to share the multi-layer tower among box + prediction head, class prediction head and other heads. + use_depthwise: Whether to use depthwise separable conv2d instead of + regular conv2d. + """ + super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training, + num_classes) + self._box_prediction_head = box_prediction_head + self._class_prediction_head = class_prediction_head + self._other_heads = other_heads + self._conv_hyperparams_fn = conv_hyperparams_fn + self._depth = depth + self._num_layers_before_predictor = num_layers_before_predictor + self._kernel_size = kernel_size + self._apply_batch_norm = apply_batch_norm + self._share_prediction_tower = share_prediction_tower + self._use_depthwise = use_depthwise + + @property + def num_classes(self): + return self._num_classes + + def _insert_additional_projection_layer(self, image_feature, + inserted_layer_counter, + target_channel): + if inserted_layer_counter < 0: + return image_feature, inserted_layer_counter + image_feature = slim.conv2d( + image_feature, + target_channel, [1, 1], + stride=1, + padding='SAME', + activation_fn=None, + normalizer_fn=(tf.identity if self._apply_batch_norm else None), + scope='ProjectionLayer/conv2d_{}'.format( + inserted_layer_counter)) + if self._apply_batch_norm: + image_feature = slim.batch_norm( + image_feature, + scope='ProjectionLayer/conv2d_{}/BatchNorm'.format( + inserted_layer_counter)) + inserted_layer_counter += 1 + return image_feature, inserted_layer_counter + + def _compute_base_tower(self, tower_name_scope, image_feature, feature_index): + net = image_feature + for i in range(self._num_layers_before_predictor): + if self._use_depthwise: + conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) + else: + conv_op = slim.conv2d + net = conv_op( + net, + self._depth, [self._kernel_size, self._kernel_size], + stride=1, + padding='SAME', + activation_fn=None, + normalizer_fn=(tf.identity if self._apply_batch_norm else None), + scope='{}/conv2d_{}'.format(tower_name_scope, i)) + if self._apply_batch_norm: + net = slim.batch_norm( + net, + scope='{}/conv2d_{}/BatchNorm/feature_{}'. + format(tower_name_scope, i, feature_index)) + net = tf.nn.relu6(net) + return net + + def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature, + feature_index, num_predictions_per_location): + if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: + tower_name_scope = 'ClassPredictionTower' + else: + tower_name_scope = head_name + 'PredictionTower' + if self._share_prediction_tower: + head_tower_feature = box_tower_feature + else: + head_tower_feature = self._compute_base_tower( + tower_name_scope=tower_name_scope, + image_feature=image_feature, + feature_index=feature_index) + return head_obj.predict( + features=head_tower_feature, + num_predictions_per_location=num_predictions_per_location) + + def _predict(self, image_features, num_predictions_per_location_list): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels] containing features for a batch of images. Note that + when not all tensors in the list have the same number of channels, an + additional projection layer will be added on top the tensor to generate + feature map with number of channels consitent with the majority. + num_predictions_per_location_list: A list of integers representing the + number of box predictions to be made per spatial location for each + feature map. Note that all values must be the same since the weights are + shared. + + Returns: + A dictionary containing: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, code_size] representing the location of + the objects. Each entry in the list corresponds to a feature map in + the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + (optional) Predictions from other heads. + E.g., mask_predictions: A list of float tensors of shape + [batch_size, num_anchord_i, num_classes, mask_height, mask_width]. + + + Raises: + ValueError: If the num predictions per locations differs between the + feature maps. + """ + if len(set(num_predictions_per_location_list)) > 1: + raise ValueError('num predictions per location must be same for all' + 'feature maps, found: {}'.format( + num_predictions_per_location_list)) + feature_channels = [ + shape_utils.get_dim_as_int(image_feature.shape[3]) + for image_feature in image_features + ] + has_different_feature_channels = len(set(feature_channels)) > 1 + if has_different_feature_channels: + inserted_layer_counter = 0 + target_channel = max(set(feature_channels), key=feature_channels.count) + tf.logging.info('Not all feature maps have the same number of ' + 'channels, found: {}, appending additional projection ' + 'layers to bring all feature maps to uniformly have {} ' + 'channels.'.format(feature_channels, target_channel)) + else: + # Place holder variables if has_different_feature_channels is False. + target_channel = -1 + inserted_layer_counter = -1 + predictions = { + BOX_ENCODINGS: [], + CLASS_PREDICTIONS_WITH_BACKGROUND: [], + } + for head_name in self._other_heads.keys(): + predictions[head_name] = [] + for feature_index, (image_feature, + num_predictions_per_location) in enumerate( + zip(image_features, + num_predictions_per_location_list)): + with tf.variable_scope('WeightSharedConvolutionalBoxPredictor', + reuse=tf.AUTO_REUSE): + with slim.arg_scope(self._conv_hyperparams_fn()): + # TODO(wangjiang) Pass is_training to the head class directly. + with slim.arg_scope([slim.dropout], is_training=self._is_training): + (image_feature, + inserted_layer_counter) = self._insert_additional_projection_layer( + image_feature, inserted_layer_counter, target_channel) + if self._share_prediction_tower: + box_tower_scope = 'PredictionTower' + else: + box_tower_scope = 'BoxPredictionTower' + box_tower_feature = self._compute_base_tower( + tower_name_scope=box_tower_scope, + image_feature=image_feature, + feature_index=feature_index) + box_encodings = self._box_prediction_head.predict( + features=box_tower_feature, + num_predictions_per_location=num_predictions_per_location) + predictions[BOX_ENCODINGS].append(box_encodings) + sorted_keys = sorted(self._other_heads.keys()) + sorted_keys.append(CLASS_PREDICTIONS_WITH_BACKGROUND) + for head_name in sorted_keys: + if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: + head_obj = self._class_prediction_head + else: + head_obj = self._other_heads[head_name] + prediction = self._predict_head( + head_name=head_name, + head_obj=head_obj, + image_feature=image_feature, + box_tower_feature=box_tower_feature, + feature_index=feature_index, + num_predictions_per_location=num_predictions_per_location) + predictions[head_name].append(prediction) + return predictions + + diff --git a/predictors/convolutional_box_predictor_test.py b/predictors/convolutional_box_predictor_test.py new file mode 100644 index 0000000..9941731 --- /dev/null +++ b/predictors/convolutional_box_predictor_test.py @@ -0,0 +1,922 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.convolutional_box_predictor.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.predictors import convolutional_box_predictor as box_predictor +from object_detection.predictors.heads import box_head +from object_detection.predictors.heads import class_head +from object_detection.predictors.heads import mask_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class ConvolutionalBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.build(conv_hyperparams, is_training=True) + + def test_get_boxes_for_five_aspect_ratios_per_location(self): + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) + + def test_get_boxes_for_one_aspect_ratio_per_location(self): + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[1], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 64, 1]) + + def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( + self): + num_classes_without_background = 6 + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], + num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 320, num_classes_without_background+1]) + + def test_get_predictions_with_feature_maps_of_dynamic_shape( + self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + expected_variable_set = set([ + 'BoxPredictor/Conv2d_0_1x1_32/biases', + 'BoxPredictor/Conv2d_0_1x1_32/weights', + 'BoxPredictor/BoxEncodingPredictor/biases', + 'BoxPredictor/BoxEncodingPredictor/weights', + 'BoxPredictor/ClassPredictor/biases', + 'BoxPredictor/ClassPredictor/weights']) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_use_depthwise_convolution(self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + dropout_keep_prob=0.8, + kernel_size=3, + box_code_size=4, + use_dropout=True, + use_depthwise=True)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + expected_variable_set = set([ + 'BoxPredictor/Conv2d_0_1x1_32/biases', + 'BoxPredictor/Conv2d_0_1x1_32/weights', + 'BoxPredictor/BoxEncodingPredictor_depthwise/biases', + 'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights', + 'BoxPredictor/BoxEncodingPredictor/biases', + 'BoxPredictor/BoxEncodingPredictor/weights', + 'BoxPredictor/ClassPredictor_depthwise/biases', + 'BoxPredictor/ClassPredictor_depthwise/depthwise_weights', + 'BoxPredictor/ClassPredictor/biases', + 'BoxPredictor/ClassPredictor/weights']) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_no_dangling_outputs(self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + dropout_keep_prob=0.8, + kernel_size=3, + box_code_size=4, + use_dropout=True, + use_depthwise=True)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + + bad_dangling_ops = [] + types_safe_to_dangle = set(['Assign', 'Mul', 'Const']) + for op in tf.get_default_graph().get_operations(): + if (not op.outputs) or (not op.outputs[0].consumers()): + if 'BoxPredictor' in op.name: + if op.type not in types_safe_to_dangle: + bad_dangling_ops.append(op) + + self.assertEqual(bad_dangling_ops, []) + + +class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + random_normal_initializer { + stddev: 0.01 + mean: 0.0 + } + } + batch_norm { + train: true, + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.build(conv_hyperparams, is_training=True) + + def _build_conv_arg_scope_no_batch_norm(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + random_normal_initializer { + stddev: 0.01 + mean: 0.0 + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.build(conv_hyperparams, is_training=True) + + def test_get_boxes_for_five_aspect_ratios_per_location(self): + + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute( + graph_fn, [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) + + def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): + + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=True, + num_classes=2, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + class_prediction_bias_init=-4.6, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + class_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (tf.nn.sigmoid(class_predictions),) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + class_predictions = self.execute(graph_fn, [image_features]) + self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3) + + def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( + self): + + num_classes_without_background = 6 + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], + num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, class_predictions_with_background) + + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 320, num_classes_without_background+1]) + + def test_get_multi_class_predictions_from_two_feature_maps( + self): + + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features1, image_features2]) + self.assertAllEqual(box_encodings.shape, [4, 640, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 640, num_classes_without_background+1]) + + def test_get_multi_class_predictions_from_feature_maps_of_different_depth( + self): + + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2, image_features3): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2, image_features3], + num_predictions_per_location=[5, 5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features3 = np.random.rand(4, 8, 8, 32).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features1, image_features2, image_features3]) + self.assertAllEqual(box_encodings.shape, [4, 960, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 960, num_classes_without_background+1]) + + def test_predictions_multiple_feature_maps_share_weights_separate_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_multiple_feature_maps_share_weights_without_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/biases'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/biases'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_multiple_feature_maps_share_weights_with_depthwise( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False, + use_depthwise=True)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/biases'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/biases'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/depthwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/pointwise_weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_no_batchnorm_params_when_batchnorm_is_not_configured(self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/biases'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/biases'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_share_weights_share_tower_separate_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + share_prediction_tower=True)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Shared prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_share_weights_share_tower_without_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + share_prediction_tower=True, + apply_batch_norm=False)) + box_predictions = conv_box_predictor.predict( + [image_features1, image_features2], + num_predictions_per_location=[5, 5], + scope='BoxPredictor') + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Shared prediction tower + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/biases'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/biases'), + # Box prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictor/biases'), + # Class prediction head + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/weights'), + ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictor/biases')]) + + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_get_predictions_with_feature_maps_of_dynamic_shape( + self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor.predict( + [image_features], num_predictions_per_location=[5], + scope='BoxPredictor') + box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], + axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + + def test_other_heads_predictions(self): + box_code_size = 4 + num_classes_without_background = 3 + other_head_name = 'Mask' + mask_height = 5 + mask_width = 5 + num_predictions_per_location = 5 + + def graph_fn(image_features): + box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( + box_code_size) + class_prediction_head = class_head.WeightSharedConvolutionalClassHead( + num_classes_without_background + 1) + other_heads = { + other_head_name: + mask_head.WeightSharedConvolutionalMaskHead( + num_classes_without_background, + mask_height=mask_height, + mask_width=mask_width) + } + conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( + is_training=False, + num_classes=num_classes_without_background, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + other_heads=other_heads, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + depth=32, + num_layers_before_predictor=2) + box_predictions = conv_box_predictor.predict( + [image_features], + num_predictions_per_location=[num_predictions_per_location], + scope='BoxPredictor') + for key, value in box_predictions.items(): + box_predictions[key] = tf.concat(value, axis=1) + assert len(box_predictions) == 3 + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + box_predictions[other_head_name]) + + batch_size = 4 + feature_ht = 8 + feature_wt = 8 + image_features = np.random.rand(batch_size, feature_ht, feature_wt, + 64).astype(np.float32) + (box_encodings, class_predictions, other_head_predictions) = self.execute( + graph_fn, [image_features]) + num_anchors = feature_ht * feature_wt * num_predictions_per_location + self.assertAllEqual(box_encodings.shape, + [batch_size, num_anchors, box_code_size]) + self.assertAllEqual( + class_predictions.shape, + [batch_size, num_anchors, num_classes_without_background + 1]) + self.assertAllEqual(other_head_predictions.shape, [ + batch_size, num_anchors, num_classes_without_background, mask_height, + mask_width + ]) + + + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/convolutional_keras_box_predictor.py b/predictors/convolutional_keras_box_predictor.py new file mode 100644 index 0000000..96fd7b7 --- /dev/null +++ b/predictors/convolutional_keras_box_predictor.py @@ -0,0 +1,476 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Convolutional Box Predictors with and without weight sharing.""" +import collections + +import tensorflow as tf + +from object_detection.core import box_predictor +from object_detection.utils import shape_utils +from object_detection.utils import static_shape + +keras = tf.keras.layers + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class _NoopVariableScope(object): + """A dummy class that does not push any scope.""" + + def __enter__(self): + return None + + def __exit__(self, exc_type, exc_value, traceback): + return False + + +class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): + """Convolutional Keras Box Predictor. + + Optionally add an intermediate 1x1 convolutional layer after features and + predict in parallel branches box_encodings and + class_predictions_with_background. + + Currently this box predictor assumes that predictions are "shared" across + classes --- that is each anchor makes box predictions which do not depend + on class. + """ + + def __init__(self, + is_training, + num_classes, + box_prediction_heads, + class_prediction_heads, + other_heads, + conv_hyperparams, + num_layers_before_predictor, + min_depth, + max_depth, + freeze_batchnorm, + inplace_batchnorm_update, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + box_prediction_heads: A list of heads that predict the boxes. + class_prediction_heads: A list of heads that predict the classes. + other_heads: A dictionary mapping head names to lists of convolutional + heads. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + min_depth: Minimum feature depth prior to predicting box encodings + and class predictions. + max_depth: Maximum feature depth prior to predicting box encodings + and class predictions. If max_depth is set to 0, no additional + feature map will be inserted before location and class predictions. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if min_depth > max_depth. + """ + super(ConvolutionalBoxPredictor, self).__init__( + is_training, num_classes, freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + name=name) + if min_depth > max_depth: + raise ValueError('min_depth should be less than or equal to max_depth') + if len(box_prediction_heads) != len(class_prediction_heads): + raise ValueError('All lists of heads must be the same length.') + for other_head_list in other_heads.values(): + if len(box_prediction_heads) != len(other_head_list): + raise ValueError('All lists of heads must be the same length.') + + self._prediction_heads = { + BOX_ENCODINGS: box_prediction_heads, + CLASS_PREDICTIONS_WITH_BACKGROUND: class_prediction_heads, + } + + if other_heads: + self._prediction_heads.update(other_heads) + + # We generate a consistent ordering for the prediction head names, + # So that all workers build the model in the exact same order + self._sorted_head_names = sorted(self._prediction_heads.keys()) + + self._conv_hyperparams = conv_hyperparams + self._min_depth = min_depth + self._max_depth = max_depth + self._num_layers_before_predictor = num_layers_before_predictor + + self._shared_nets = [] + + def build(self, input_shapes): + """Creates the variables of the layer.""" + if len(input_shapes) != len(self._prediction_heads[BOX_ENCODINGS]): + raise ValueError('This box predictor was constructed with %d heads,' + 'but there are %d inputs.' % + (len(self._prediction_heads[BOX_ENCODINGS]), + len(input_shapes))) + for stack_index, input_shape in enumerate(input_shapes): + net = [] + + # Add additional conv layers before the class predictor. + features_depth = static_shape.get_depth(input_shape) + depth = max(min(features_depth, self._max_depth), self._min_depth) + tf.logging.info( + 'depth of additional conv before box predictor: {}'.format(depth)) + + if depth > 0 and self._num_layers_before_predictor > 0: + for i in range(self._num_layers_before_predictor): + net.append(keras.Conv2D(depth, [1, 1], + name='SharedConvolutions_%d/Conv2d_%d_1x1_%d' + % (stack_index, i, depth), + padding='SAME', + **self._conv_hyperparams.params())) + net.append(self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm' + % (stack_index, i, depth))) + net.append(self._conv_hyperparams.build_activation_layer( + name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation' + % (stack_index, i, depth), + )) + # Until certain bugs are fixed in checkpointable lists, + # this net must be appended only once it's been filled with layers + self._shared_nets.append(net) + self.built = True + + def _predict(self, image_features, **kwargs): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + **kwargs: Unused Keyword args + + Returns: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. Each entry in the + list corresponds to a feature map in the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + """ + predictions = collections.defaultdict(list) + + for (index, net) in enumerate(image_features): + + # Apply shared conv layers before the head predictors. + for layer in self._shared_nets[index]: + net = layer(net) + + for head_name in self._sorted_head_names: + head_obj = self._prediction_heads[head_name][index] + prediction = head_obj(net) + predictions[head_name].append(prediction) + + return predictions + + +class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): + """Convolutional Box Predictor with weight sharing based on Keras. + + Defines the box predictor as defined in + https://arxiv.org/abs/1708.02002. This class differs from + ConvolutionalBoxPredictor in that it shares weights and biases while + predicting from different feature maps. However, batch_norm parameters are not + shared because the statistics of the activations vary among the different + feature maps. + + Also note that separate multi-layer towers are constructed for the box + encoding and class predictors respectively. + """ + + def __init__(self, + is_training, + num_classes, + box_prediction_head, + class_prediction_head, + other_heads, + conv_hyperparams, + depth, + num_layers_before_predictor, + freeze_batchnorm, + inplace_batchnorm_update, + kernel_size=3, + apply_batch_norm=False, + share_prediction_tower=False, + use_depthwise=False, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + box_prediction_head: The head that predicts the boxes. + class_prediction_head: The head that predicts the classes. + other_heads: A dictionary mapping head names to convolutional + head classes. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + depth: depth of conv layers. + num_layers_before_predictor: Number of the additional conv layers before + the predictor. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + inplace_batchnorm_update: Whether to update batch norm moving average + values inplace. When this is false train op must add a control + dependency on tf.graphkeys.UPDATE_OPS collection in order to update + batch norm statistics. + kernel_size: Size of final convolution kernel. + apply_batch_norm: Whether to apply batch normalization to conv layers in + this predictor. + share_prediction_tower: Whether to share the multi-layer tower among box + prediction head, class prediction head and other heads. + use_depthwise: Whether to use depthwise separable conv2d instead of + regular conv2d. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + """ + super(WeightSharedConvolutionalBoxPredictor, self).__init__( + is_training, num_classes, freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=inplace_batchnorm_update, + name=name) + + self._box_prediction_head = box_prediction_head + self._prediction_heads = { + CLASS_PREDICTIONS_WITH_BACKGROUND: class_prediction_head, + } + if other_heads: + self._prediction_heads.update(other_heads) + # We generate a consistent ordering for the prediction head names, + # so that all workers build the model in the exact same order. + self._sorted_head_names = sorted(self._prediction_heads.keys()) + + self._conv_hyperparams = conv_hyperparams + self._depth = depth + self._num_layers_before_predictor = num_layers_before_predictor + self._kernel_size = kernel_size + self._apply_batch_norm = apply_batch_norm + self._share_prediction_tower = share_prediction_tower + self._use_depthwise = use_depthwise + + # Additional projection layers to bring all feature maps to uniform + # channels. + self._additional_projection_layers = [] + # The base tower layers for each head. + self._base_tower_layers_for_heads = { + BOX_ENCODINGS: [], + CLASS_PREDICTIONS_WITH_BACKGROUND: [], + } + for head_name in other_heads.keys(): + self._base_tower_layers_for_heads[head_name] = [] + + # A dict maps the tower_name_scope of each head to the shared conv layers in + # the base tower for different feature map levels. + self._head_scope_conv_layers = {} + + def _insert_additional_projection_layer( + self, inserted_layer_counter, target_channel): + projection_layers = [] + if inserted_layer_counter >= 0: + use_bias = False if self._apply_batch_norm else True + projection_layers.append(keras.Conv2D( + target_channel, [1, 1], strides=1, padding='SAME', + name='ProjectionLayer/conv2d_{}'.format(inserted_layer_counter), + **self._conv_hyperparams.params(use_bias=use_bias))) + if self._apply_batch_norm: + projection_layers.append(self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='ProjectionLayer/conv2d_{}/BatchNorm'.format( + inserted_layer_counter))) + inserted_layer_counter += 1 + return inserted_layer_counter, projection_layers + + def _compute_base_tower(self, tower_name_scope, feature_index): + conv_layers = [] + batch_norm_layers = [] + activation_layers = [] + use_bias = False if self._apply_batch_norm else True + for additional_conv_layer_idx in range(self._num_layers_before_predictor): + layer_name = '{}/conv2d_{}'.format( + tower_name_scope, additional_conv_layer_idx) + if tower_name_scope not in self._head_scope_conv_layers: + if self._use_depthwise: + kwargs = self._conv_hyperparams.params(use_bias=use_bias) + # Both the regularizer and initializer apply to the depthwise layer, + # so we remap the kernel_* to depthwise_* here. + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + conv_layers.append( + tf.keras.layers.SeparableConv2D( + self._depth, [self._kernel_size, self._kernel_size], + padding='SAME', + name=layer_name, + **kwargs)) + else: + conv_layers.append( + tf.keras.layers.Conv2D( + self._depth, + [self._kernel_size, self._kernel_size], + padding='SAME', + name=layer_name, + **self._conv_hyperparams.params(use_bias=use_bias))) + # Each feature gets a separate batchnorm parameter even though they share + # the same convolution weights. + if self._apply_batch_norm: + batch_norm_layers.append(self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='{}/conv2d_{}/BatchNorm/feature_{}'.format( + tower_name_scope, additional_conv_layer_idx, feature_index))) + activation_layers.append(tf.keras.layers.Lambda(tf.nn.relu6)) + + # Set conv layers as the shared conv layers for different feature maps with + # the same tower_name_scope. + if tower_name_scope in self._head_scope_conv_layers: + conv_layers = self._head_scope_conv_layers[tower_name_scope] + + # Stack the base_tower_layers in the order of conv_layer, batch_norm_layer + # and activation_layer + base_tower_layers = [] + for i in range(self._num_layers_before_predictor): + base_tower_layers.extend([conv_layers[i]]) + if self._apply_batch_norm: + base_tower_layers.extend([batch_norm_layers[i]]) + base_tower_layers.extend([activation_layers[i]]) + return conv_layers, base_tower_layers + + def build(self, input_shapes): + """Creates the variables of the layer.""" + feature_channels = [ + shape_utils.get_dim_as_int(input_shape[3]) + for input_shape in input_shapes + ] + has_different_feature_channels = len(set(feature_channels)) > 1 + if has_different_feature_channels: + inserted_layer_counter = 0 + target_channel = max(set(feature_channels), key=feature_channels.count) + tf.logging.info('Not all feature maps have the same number of ' + 'channels, found: {}, appending additional projection ' + 'layers to bring all feature maps to uniformly have {} ' + 'channels.'.format(feature_channels, target_channel)) + else: + # Place holder variables if has_different_feature_channels is False. + target_channel = -1 + inserted_layer_counter = -1 + + def _build_layers(tower_name_scope, feature_index): + conv_layers, base_tower_layers = self._compute_base_tower( + tower_name_scope=tower_name_scope, feature_index=feature_index) + if tower_name_scope not in self._head_scope_conv_layers: + self._head_scope_conv_layers[tower_name_scope] = conv_layers + return base_tower_layers + + for feature_index, input_shape in enumerate(input_shapes): + # Additional projection layers should not be shared as input channels + # (and thus weight shapes) are different + inserted_layer_counter, projection_layers = ( + self._insert_additional_projection_layer( + inserted_layer_counter, target_channel)) + self._additional_projection_layers.append(projection_layers) + + if self._share_prediction_tower: + box_tower_scope = 'PredictionTower' + else: + box_tower_scope = 'BoxPredictionTower' + # For box tower base + box_tower_layers = _build_layers(box_tower_scope, feature_index) + self._base_tower_layers_for_heads[BOX_ENCODINGS].append(box_tower_layers) + + for head_name in self._sorted_head_names: + if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: + tower_name_scope = 'ClassPredictionTower' + else: + tower_name_scope = '{}PredictionTower'.format(head_name) + box_tower_layers = _build_layers(tower_name_scope, feature_index) + self._base_tower_layers_for_heads[head_name].append(box_tower_layers) + + self.built = True + + def _predict(self, image_features, **kwargs): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + **kwargs: Unused Keyword args + + Returns: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. Each entry in the + list corresponds to a feature map in the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + """ + predictions = collections.defaultdict(list) + + def _apply_layers(base_tower_layers, image_feature): + for layer in base_tower_layers: + image_feature = layer(image_feature) + return image_feature + + for (index, image_feature) in enumerate(image_features): + # Apply additional projection layers to image features + for layer in self._additional_projection_layers[index]: + image_feature = layer(image_feature) + + # Apply box tower layers. + box_tower_feature = _apply_layers( + self._base_tower_layers_for_heads[BOX_ENCODINGS][index], + image_feature) + box_encodings = self._box_prediction_head(box_tower_feature) + predictions[BOX_ENCODINGS].append(box_encodings) + + for head_name in self._sorted_head_names: + head_obj = self._prediction_heads[head_name] + if self._share_prediction_tower: + head_tower_feature = box_tower_feature + else: + head_tower_feature = _apply_layers( + self._base_tower_layers_for_heads[head_name][index], + image_feature) + prediction = head_obj(head_tower_feature) + predictions[head_name].append(prediction) + return predictions diff --git a/predictors/convolutional_keras_box_predictor_test.py b/predictors/convolutional_keras_box_predictor_test.py new file mode 100644 index 0000000..c3ad839 --- /dev/null +++ b/predictors/convolutional_keras_box_predictor_test.py @@ -0,0 +1,908 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.convolutional_keras_box_predictor.""" +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.predictors import convolutional_keras_box_predictor as box_predictor +from object_detection.predictors.heads import keras_box_head +from object_detection.predictors.heads import keras_class_head +from object_detection.predictors.heads import keras_mask_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class ConvolutionalKerasBoxPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_get_boxes_for_five_aspect_ratios_per_location(self): + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4 + )) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) + + def test_get_boxes_for_one_aspect_ratio_per_location(self): + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[1], + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4 + )) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 64, 1]) + + def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( + self): + num_classes_without_background = 6 + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4 + )) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 320, num_classes_without_background+1]) + + def test_get_predictions_with_feature_maps_of_dynamic_shape( + self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=1, + box_code_size=4 + )) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + expected_variable_set = set([ + 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias', + 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel', + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias', + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel', + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias', + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel']) + self.assertEqual(expected_variable_set, actual_variable_set) + self.assertEqual(conv_box_predictor._sorted_head_names, + ['box_encodings', 'class_predictions_with_background']) + + def test_use_depthwise_convolution(self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + min_depth=0, + max_depth=32, + num_layers_before_predictor=1, + use_dropout=True, + dropout_keep_prob=0.8, + kernel_size=3, + box_code_size=4, + use_depthwise=True + )) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + expected_variable_set = set([ + 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias', + 'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel', + + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/' + 'bias', + + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/' + 'depthwise_kernel', + + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias', + 'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel', + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/bias', + + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/' + 'depthwise_kernel', + + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias', + 'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel']) + self.assertEqual(expected_variable_set, actual_variable_set) + self.assertEqual(conv_box_predictor._sorted_head_names, + ['box_encodings', 'class_predictions_with_background']) + + +class WeightSharedConvolutionalKerasBoxPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self, add_batch_norm=True): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: RELU_6 + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + stddev: 0.01 + mean: 0.0 + } + } + """ + if add_batch_norm: + batch_norm_proto = """ + batch_norm { + train: true, + } + """ + conv_hyperparams_text_proto += batch_norm_proto + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + # pylint: disable=line-too-long + def test_get_boxes_for_five_aspect_ratios_per_location(self): + + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, objectness_predictions) + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, objectness_predictions) = self.execute( + graph_fn, [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 4]) + self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) + + def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): + + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=True, + num_classes=2, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + depth=32, + num_layers_before_predictor=1, + class_prediction_bias_init=-4.6, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features]) + class_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (tf.nn.sigmoid(class_predictions),) + + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + class_predictions = self.execute(graph_fn, [image_features]) + self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3) + + def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( + self): + + num_classes_without_background = 6 + def graph_fn(image_features): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + return (box_encodings, class_predictions_with_background) + + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features]) + self.assertAllEqual(box_encodings.shape, [4, 320, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 320, num_classes_without_background+1]) + + def test_get_multi_class_predictions_from_two_feature_maps( + self): + + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features1, image_features2]) + self.assertAllEqual(box_encodings.shape, [4, 640, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 640, num_classes_without_background+1]) + + def test_get_multi_class_predictions_from_feature_maps_of_different_depth( + self): + + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2, image_features3): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5, 5], + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor( + [image_features1, image_features2, image_features3]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) + image_features3 = np.random.rand(4, 8, 8, 32).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features1, image_features2, image_features3]) + self.assertAllEqual(box_encodings.shape, [4, 960, 4]) + self.assertAllEqual(class_predictions_with_background.shape, + [4, 960, num_classes_without_background+1]) + + def test_predictions_multiple_feature_maps_share_weights_separate_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_multiple_feature_maps_share_weights_without_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False)) + box_predictions = conv_box_predictor([image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/bias'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/bias'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_multiple_feature_maps_share_weights_with_depthwise( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams( + add_batch_norm=False), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False, + use_depthwise=True)) + box_predictions = conv_box_predictor([image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/bias'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/bias'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/depthwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/pointwise_kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_no_batchnorm_params_when_batchnorm_is_not_configured(self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams( + add_batch_norm=False), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + apply_batch_norm=False)) + box_predictions = conv_box_predictor( + [image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Box prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'BoxPredictionTower/conv2d_1/bias'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'ClassPredictionTower/conv2d_1/bias'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_share_weights_share_tower_separate_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + share_prediction_tower=True)) + box_predictions = conv_box_predictor( + [image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Shared prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_predictions_share_weights_share_tower_without_batchnorm( + self): + num_classes_without_background = 6 + def graph_fn(image_features1, image_features2): + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams( + add_batch_norm=False), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5, 5], + depth=32, + num_layers_before_predictor=2, + box_code_size=4, + share_prediction_tower=True, + apply_batch_norm=False)) + box_predictions = conv_box_predictor( + [image_features1, image_features2]) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + with self.test_session(graph=tf.Graph()): + graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), + tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) + actual_variable_set = set( + [var.op.name for var in tf.trainable_variables()]) + expected_variable_set = set([ + # Shared prediction tower + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_0/bias'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'PredictionTower/conv2d_1/bias'), + # Box prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalBoxHead/BoxPredictor/bias'), + # Class prediction head + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/kernel'), + ('WeightSharedConvolutionalBoxPredictor/' + 'WeightSharedConvolutionalClassHead/ClassPredictor/bias')]) + + self.assertEqual(expected_variable_set, actual_variable_set) + + def test_get_predictions_with_feature_maps_of_dynamic_shape( + self): + image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) + conv_box_predictor = ( + box_predictor_builder.build_weight_shared_convolutional_keras_box_predictor( + is_training=False, + num_classes=0, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + num_predictions_per_location_list=[5], + depth=32, + num_layers_before_predictor=1, + box_code_size=4)) + box_predictions = conv_box_predictor([image_features]) + box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], + axis=1) + objectness_predictions = tf.concat(box_predictions[ + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) + init_op = tf.global_variables_initializer() + + resolution = 32 + expected_num_anchors = resolution*resolution*5 + with self.test_session() as sess: + sess.run(init_op) + (box_encodings_shape, + objectness_predictions_shape) = sess.run( + [tf.shape(box_encodings), tf.shape(objectness_predictions)], + feed_dict={image_features: + np.random.rand(4, resolution, resolution, 64)}) + self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) + self.assertAllEqual(objectness_predictions_shape, + [4, expected_num_anchors, 1]) + + def test_other_heads_predictions(self): + box_code_size = 4 + num_classes_without_background = 3 + other_head_name = 'Mask' + mask_height = 5 + mask_width = 5 + num_predictions_per_location = 5 + + def graph_fn(image_features): + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=box_code_size, + conv_hyperparams=self._build_conv_hyperparams(), + num_predictions_per_location=num_predictions_per_location) + class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=num_classes_without_background + 1, + conv_hyperparams=self._build_conv_hyperparams(), + num_predictions_per_location=num_predictions_per_location) + other_heads = { + other_head_name: + keras_mask_head.WeightSharedConvolutionalMaskHead( + num_classes=num_classes_without_background, + conv_hyperparams=self._build_conv_hyperparams(), + num_predictions_per_location=num_predictions_per_location, + mask_height=mask_height, + mask_width=mask_width) + } + + conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( + is_training=False, + num_classes=num_classes_without_background, + box_prediction_head=box_prediction_head, + class_prediction_head=class_prediction_head, + other_heads=other_heads, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + inplace_batchnorm_update=False, + depth=32, + num_layers_before_predictor=2) + box_predictions = conv_box_predictor([image_features]) + for key, value in box_predictions.items(): + box_predictions[key] = tf.concat(value, axis=1) + assert len(box_predictions) == 3 + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + box_predictions[other_head_name]) + + batch_size = 4 + feature_ht = 8 + feature_wt = 8 + image_features = np.random.rand(batch_size, feature_ht, feature_wt, + 64).astype(np.float32) + (box_encodings, class_predictions, other_head_predictions) = self.execute( + graph_fn, [image_features]) + num_anchors = feature_ht * feature_wt * num_predictions_per_location + self.assertAllEqual(box_encodings.shape, + [batch_size, num_anchors, box_code_size]) + self.assertAllEqual( + class_predictions.shape, + [batch_size, num_anchors, num_classes_without_background + 1]) + self.assertAllEqual(other_head_predictions.shape, [ + batch_size, num_anchors, num_classes_without_background, mask_height, + mask_width + ]) + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/box_head.py b/predictors/heads/box_head.py new file mode 100644 index 0000000..41f6355 --- /dev/null +++ b/predictors/heads/box_head.py @@ -0,0 +1,282 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Box Head. + +Contains Box prediction head classes for different meta architectures. +All the box prediction heads have a predict function that receives the +`features` as the first argument and returns `box_encodings`. +""" +import functools +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.predictors.heads import head + +slim = contrib_slim + + +class MaskRCNNBoxHead(head.Head): + """Box prediction head. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + is_training, + num_classes, + fc_hyperparams_fn, + use_dropout, + dropout_keep_prob, + box_code_size, + share_box_across_classes=False): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + fc_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for fully connected ops. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + box_code_size: Size of encoding for each box. + share_box_across_classes: Whether to share boxes across classes rather + than use a different box for each class. + """ + super(MaskRCNNBoxHead, self).__init__() + self._is_training = is_training + self._num_classes = num_classes + self._fc_hyperparams_fn = fc_hyperparams_fn + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._box_code_size = box_code_size + self._share_box_across_classes = share_box_across_classes + + def predict(self, features, num_predictions_per_location=1): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, + channels] containing features for a batch of images. + num_predictions_per_location: Int containing number of predictions per + location. + + Returns: + box_encodings: A float tensor of shape + [batch_size, 1, num_classes, code_size] representing the location of the + objects. + + Raises: + ValueError: If num_predictions_per_location is not 1. + """ + if num_predictions_per_location != 1: + raise ValueError('Only num_predictions_per_location=1 is supported') + spatial_averaged_roi_pooled_features = tf.reduce_mean( + features, [1, 2], keep_dims=True, name='AvgPool') + flattened_roi_pooled_features = slim.flatten( + spatial_averaged_roi_pooled_features) + if self._use_dropout: + flattened_roi_pooled_features = slim.dropout( + flattened_roi_pooled_features, + keep_prob=self._dropout_keep_prob, + is_training=self._is_training) + number_of_boxes = 1 + if not self._share_box_across_classes: + number_of_boxes = self._num_classes + + with slim.arg_scope(self._fc_hyperparams_fn()): + box_encodings = slim.fully_connected( + flattened_roi_pooled_features, + number_of_boxes * self._box_code_size, + activation_fn=None, + scope='BoxEncodingPredictor') + box_encodings = tf.reshape(box_encodings, + [-1, 1, number_of_boxes, self._box_code_size]) + return box_encodings + + +class ConvolutionalBoxHead(head.Head): + """Convolutional box prediction head.""" + + def __init__(self, + is_training, + box_code_size, + kernel_size, + use_depthwise=False, + box_encodings_clip_range=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + box_code_size: Size of encoding for each box. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + box_encodings_clip_range: Min and max values for clipping box_encodings. + + Raises: + ValueError: if min_depth > max_depth. + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(ConvolutionalBoxHead, self).__init__() + self._is_training = is_training + self._box_code_size = box_code_size + self._kernel_size = kernel_size + self._use_depthwise = use_depthwise + self._box_encodings_clip_range = box_encodings_clip_range + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + + Returns: + box_encodings: A float tensors of shape + [batch_size, num_anchors, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. + """ + net = features + if self._use_depthwise: + box_encodings = slim.separable_conv2d( + net, None, [self._kernel_size, self._kernel_size], + padding='SAME', depth_multiplier=1, stride=1, + rate=1, scope='BoxEncodingPredictor_depthwise') + box_encodings = slim.conv2d( + box_encodings, + num_predictions_per_location * self._box_code_size, [1, 1], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope='BoxEncodingPredictor') + else: + box_encodings = slim.conv2d( + net, num_predictions_per_location * self._box_code_size, + [self._kernel_size, self._kernel_size], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope='BoxEncodingPredictor') + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + # Clipping the box encodings to make the inference graph TPU friendly. + if self._box_encodings_clip_range is not None: + box_encodings = tf.clip_by_value( + box_encodings, self._box_encodings_clip_range.min, + self._box_encodings_clip_range.max) + box_encodings = tf.reshape(box_encodings, + [batch_size, -1, 1, self._box_code_size]) + return box_encodings + + +# TODO(alirezafathi): See if possible to unify Weight Shared with regular +# convolutional box head. +class WeightSharedConvolutionalBoxHead(head.Head): + """Weight shared convolutional box prediction head. + + This head allows sharing the same set of parameters (weights) when called more + then once on different feature maps. + """ + + def __init__(self, + box_code_size, + kernel_size=3, + use_depthwise=False, + box_encodings_clip_range=None, + return_flat_predictions=True): + """Constructor. + + Args: + box_code_size: Size of encoding for each box. + kernel_size: Size of final convolution kernel. + use_depthwise: Whether to use depthwise convolutions for prediction steps. + Default is False. + box_encodings_clip_range: Min and max values for clipping box_encodings. + return_flat_predictions: If true, returns flattened prediction tensor + of shape [batch, height * width * num_predictions_per_location, + box_coder]. Otherwise returns the prediction tensor before reshaping, + whose shape is [batch, height, width, num_predictions_per_location * + num_class_slots]. + + Raises: + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(WeightSharedConvolutionalBoxHead, self).__init__() + self._box_code_size = box_code_size + self._kernel_size = kernel_size + self._use_depthwise = use_depthwise + self._box_encodings_clip_range = box_encodings_clip_range + self._return_flat_predictions = return_flat_predictions + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. + + Returns: + box_encodings: A float tensor of shape + [batch_size, num_anchors, code_size] representing the location of + the objects, or a float tensor of shape [batch, height, width, + num_predictions_per_location * box_code_size] representing grid box + location predictions if self._return_flat_predictions is False. + """ + box_encodings_net = features + if self._use_depthwise: + conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) + else: + conv_op = slim.conv2d + box_encodings = conv_op( + box_encodings_net, + num_predictions_per_location * self._box_code_size, + [self._kernel_size, self._kernel_size], + activation_fn=None, stride=1, padding='SAME', + normalizer_fn=None, + scope='BoxPredictor') + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + # Clipping the box encodings to make the inference graph TPU friendly. + if self._box_encodings_clip_range is not None: + box_encodings = tf.clip_by_value( + box_encodings, self._box_encodings_clip_range.min, + self._box_encodings_clip_range.max) + if self._return_flat_predictions: + box_encodings = tf.reshape(box_encodings, + [batch_size, -1, self._box_code_size]) + return box_encodings diff --git a/predictors/heads/box_head_test.py b/predictors/heads/box_head_test.py new file mode 100644 index 0000000..34df8f4 --- /dev/null +++ b/predictors/heads/box_head_test.py @@ -0,0 +1,127 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.box_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import box_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNBoxHeadTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + box_prediction_head = box_head.MaskRCNNBoxHead( + is_training=False, + num_classes=20, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=True, + dropout_keep_prob=0.5, + box_code_size=4, + share_box_across_classes=False) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = box_prediction_head.predict( + features=roi_pooled_features, num_predictions_per_location=1) + self.assertAllEqual([64, 1, 20, 4], prediction.get_shape().as_list()) + + +class ConvolutionalBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + box_prediction_head = box_head.ConvolutionalBoxHead( + is_training=True, + box_code_size=4, + kernel_size=3) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list()) + + +class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( + box_code_size=4) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 4], box_encodings.get_shape().as_list()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/class_head.py b/predictors/heads/class_head.py new file mode 100644 index 0000000..64b2df9 --- /dev/null +++ b/predictors/heads/class_head.py @@ -0,0 +1,316 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Class Head. + +Contains Class prediction head classes for different meta architectures. +All the class prediction heads have a predict function that receives the +`features` as the first argument and returns class predictions with background. +""" +import functools +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.predictors.heads import head + +slim = contrib_slim + + +class MaskRCNNClassHead(head.Head): + """Mask RCNN class prediction head. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + is_training, + num_class_slots, + fc_hyperparams_fn, + use_dropout, + dropout_keep_prob, + scope='ClassPredictor'): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + fc_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for fully connected ops. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + scope: Scope name for the convolution operation. + """ + super(MaskRCNNClassHead, self).__init__() + self._is_training = is_training + self._num_class_slots = num_class_slots + self._fc_hyperparams_fn = fc_hyperparams_fn + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._scope = scope + + def predict(self, features, num_predictions_per_location=1): + """Predicts boxes and class scores. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing features for a batch of images. + num_predictions_per_location: Int containing number of predictions per + location. + + Returns: + class_predictions_with_background: A float tensor of shape + [batch_size, 1, num_class_slots] representing the class predictions for + the proposals. + + Raises: + ValueError: If num_predictions_per_location is not 1. + """ + if num_predictions_per_location != 1: + raise ValueError('Only num_predictions_per_location=1 is supported') + spatial_averaged_roi_pooled_features = tf.reduce_mean( + features, [1, 2], keep_dims=True, name='AvgPool') + flattened_roi_pooled_features = slim.flatten( + spatial_averaged_roi_pooled_features) + if self._use_dropout: + flattened_roi_pooled_features = slim.dropout( + flattened_roi_pooled_features, + keep_prob=self._dropout_keep_prob, + is_training=self._is_training) + + with slim.arg_scope(self._fc_hyperparams_fn()): + class_predictions_with_background = slim.fully_connected( + flattened_roi_pooled_features, + self._num_class_slots, + activation_fn=None, + scope=self._scope) + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [-1, 1, self._num_class_slots]) + return class_predictions_with_background + + +class ConvolutionalClassHead(head.Head): + """Convolutional class prediction head.""" + + def __init__(self, + is_training, + num_class_slots, + use_dropout, + dropout_keep_prob, + kernel_size, + apply_sigmoid_to_scores=False, + class_prediction_bias_init=0.0, + use_depthwise=False, + scope='ClassPredictor'): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + apply_sigmoid_to_scores: if True, apply the sigmoid on the output + class_predictions. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + scope: Scope name for the convolution operation. + + Raises: + ValueError: if min_depth > max_depth. + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(ConvolutionalClassHead, self).__init__() + self._is_training = is_training + self._num_class_slots = num_class_slots + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._kernel_size = kernel_size + self._apply_sigmoid_to_scores = apply_sigmoid_to_scores + self._class_prediction_bias_init = class_prediction_bias_init + self._use_depthwise = use_depthwise + self._scope = scope + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. + + Returns: + class_predictions_with_background: A float tensors of shape + [batch_size, num_anchors, num_class_slots] representing the class + predictions for the proposals. + """ + net = features + if self._use_dropout: + net = slim.dropout(net, keep_prob=self._dropout_keep_prob) + if self._use_depthwise: + depthwise_scope = self._scope + '_depthwise' + class_predictions_with_background = slim.separable_conv2d( + net, None, [self._kernel_size, self._kernel_size], + padding='SAME', depth_multiplier=1, stride=1, + rate=1, scope=depthwise_scope) + class_predictions_with_background = slim.conv2d( + class_predictions_with_background, + num_predictions_per_location * self._num_class_slots, [1, 1], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope=self._scope) + else: + class_predictions_with_background = slim.conv2d( + net, + num_predictions_per_location * self._num_class_slots, + [self._kernel_size, self._kernel_size], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope=self._scope, + biases_initializer=tf.constant_initializer( + self._class_prediction_bias_init)) + if self._apply_sigmoid_to_scores: + class_predictions_with_background = tf.sigmoid( + class_predictions_with_background) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size, -1, self._num_class_slots]) + return class_predictions_with_background + + +# TODO(alirezafathi): See if possible to unify Weight Shared with regular +# convolutional class head. +class WeightSharedConvolutionalClassHead(head.Head): + """Weight shared convolutional class prediction head. + + This head allows sharing the same set of parameters (weights) when called more + then once on different feature maps. + """ + + def __init__(self, + num_class_slots, + kernel_size=3, + class_prediction_bias_init=0.0, + use_dropout=False, + dropout_keep_prob=0.8, + use_depthwise=False, + score_converter_fn=tf.identity, + return_flat_predictions=True, + scope='ClassPredictor'): + """Constructor. + + Args: + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + kernel_size: Size of final convolution kernel. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + score_converter_fn: Callable elementwise nonlinearity (that takes tensors + as inputs and returns tensors). + return_flat_predictions: If true, returns flattened prediction tensor + of shape [batch, height * width * num_predictions_per_location, + box_coder]. Otherwise returns the prediction tensor before reshaping, + whose shape is [batch, height, width, num_predictions_per_location * + num_class_slots]. + scope: Scope name for the convolution operation. + + Raises: + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(WeightSharedConvolutionalClassHead, self).__init__() + self._num_class_slots = num_class_slots + self._kernel_size = kernel_size + self._class_prediction_bias_init = class_prediction_bias_init + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._use_depthwise = use_depthwise + self._score_converter_fn = score_converter_fn + self._return_flat_predictions = return_flat_predictions + self._scope = scope + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. + + Returns: + class_predictions_with_background: A tensor of shape + [batch_size, num_anchors, num_class_slots] representing the class + predictions for the proposals, or a tensor of shape [batch, height, + width, num_predictions_per_location * num_class_slots] representing + class predictions before reshaping if self._return_flat_predictions is + False. + """ + class_predictions_net = features + if self._use_dropout: + class_predictions_net = slim.dropout( + class_predictions_net, keep_prob=self._dropout_keep_prob) + if self._use_depthwise: + conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) + else: + conv_op = slim.conv2d + class_predictions_with_background = conv_op( + class_predictions_net, + num_predictions_per_location * self._num_class_slots, + [self._kernel_size, self._kernel_size], + activation_fn=None, stride=1, padding='SAME', + normalizer_fn=None, + biases_initializer=tf.constant_initializer( + self._class_prediction_bias_init), + scope=self._scope) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + class_predictions_with_background = self._score_converter_fn( + class_predictions_with_background) + if self._return_flat_predictions: + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size, -1, self._num_class_slots]) + return class_predictions_with_background diff --git a/predictors/heads/class_head_test.py b/predictors/heads/class_head_test.py new file mode 100644 index 0000000..4680524 --- /dev/null +++ b/predictors/heads/class_head_test.py @@ -0,0 +1,194 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.class_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import class_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNClassHeadTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + class_prediction_head = class_head.MaskRCNNClassHead( + is_training=False, + num_class_slots=20, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=True, + dropout_keep_prob=0.5) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = class_prediction_head.predict( + features=roi_pooled_features, num_predictions_per_location=1) + self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list()) + + def test_scope_name(self): + expected_var_names = set([ + """ClassPredictor/weights""", + """ClassPredictor/biases""" + ]) + + g = tf.Graph() + with g.as_default(): + class_prediction_head = class_head.MaskRCNNClassHead( + is_training=True, + num_class_slots=20, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=True, + dropout_keep_prob=0.5) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + actual_variable_set = set([ + var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + ]) + self.assertSetEqual(expected_var_names, actual_variable_set) + + +class ConvolutionalClassPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + class_prediction_head = class_head.ConvolutionalClassHead( + is_training=True, + num_class_slots=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 20], + class_predictions.get_shape().as_list()) + + def test_scope_name(self): + expected_var_names = set([ + """ClassPredictor/weights""", + """ClassPredictor/biases""" + ]) + g = tf.Graph() + with g.as_default(): + class_prediction_head = class_head.ConvolutionalClassHead( + is_training=True, + num_class_slots=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + actual_variable_set = set([ + var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + ]) + self.assertSetEqual(expected_var_names, actual_variable_set) + + +class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + class_prediction_head = ( + class_head.WeightSharedConvolutionalClassHead(num_class_slots=20)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list()) + + def test_scope_name(self): + expected_var_names = set([ + """ClassPredictor/weights""", + """ClassPredictor/biases""" + ]) + g = tf.Graph() + with g.as_default(): + class_prediction_head = class_head.WeightSharedConvolutionalClassHead( + num_class_slots=20) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + actual_variable_set = set([ + var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + ]) + self.assertSetEqual(expected_var_names, actual_variable_set) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/head.py b/predictors/heads/head.py new file mode 100644 index 0000000..cd1607a --- /dev/null +++ b/predictors/heads/head.py @@ -0,0 +1,81 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base head class. + +All the different kinds of prediction heads in different models will inherit +from this class. What is in common between all head classes is that they have a +`predict` function that receives `features` as its first argument. + +How to add a new prediction head to an existing meta architecture? +For example, how can we add a `3d shape` prediction head to Mask RCNN? + +We have to take the following steps to add a new prediction head to an +existing meta arch: +(a) Add a class for predicting the head. This class should inherit from the +`Head` class below and have a `predict` function that receives the features +and predicts the output. The output is always a tf.float32 tensor. +(b) Add the head to the meta architecture. For example in case of Mask RCNN, +go to box_predictor_builder and put in the logic for adding the new head to the +Mask RCNN box predictor. +(c) Add the logic for computing the loss for the new head. +(d) Add the necessary metrics for the new head. +(e) (optional) Add visualization for the new head. +""" +from abc import abstractmethod + +import tensorflow as tf + + +class Head(object): + """Mask RCNN head base class.""" + + def __init__(self): + """Constructor.""" + pass + + @abstractmethod + def predict(self, features, num_predictions_per_location): + """Returns the head's predictions. + + Args: + features: A float tensor of features. + num_predictions_per_location: Int containing number of predictions per + location. + + Returns: + A tf.float32 tensor. + """ + pass + + +class KerasHead(tf.keras.Model): + """Keras head base class.""" + + def call(self, features): + """The Keras model call will delegate to the `_predict` method.""" + return self._predict(features) + + @abstractmethod + def _predict(self, features): + """Returns the head's predictions. + + Args: + features: A float tensor of features. + + Returns: + A tf.float32 tensor. + """ + pass diff --git a/predictors/heads/keras_box_head.py b/predictors/heads/keras_box_head.py new file mode 100644 index 0000000..3c1c114 --- /dev/null +++ b/predictors/heads/keras_box_head.py @@ -0,0 +1,333 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Box Head. + +Contains Box prediction head classes for different meta architectures. +All the box prediction heads have a _predict function that receives the +`features` as the first argument and returns `box_encodings`. +""" +import tensorflow as tf + +from object_detection.predictors.heads import head + + +class ConvolutionalBoxHead(head.KerasHead): + """Convolutional box prediction head.""" + + def __init__(self, + is_training, + box_code_size, + kernel_size, + num_predictions_per_location, + conv_hyperparams, + freeze_batchnorm, + use_depthwise=False, + box_encodings_clip_range=None, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + box_code_size: Size of encoding for each box. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + box_encodings_clip_range: Min and max values for clipping box_encodings. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if min_depth > max_depth. + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(ConvolutionalBoxHead, self).__init__(name=name) + self._is_training = is_training + self._box_code_size = box_code_size + self._kernel_size = kernel_size + self._num_predictions_per_location = num_predictions_per_location + self._use_depthwise = use_depthwise + self._box_encodings_clip_range = box_encodings_clip_range + + self._box_encoder_layers = [] + + if self._use_depthwise: + self._box_encoder_layers.append( + tf.keras.layers.DepthwiseConv2D( + [self._kernel_size, self._kernel_size], + padding='SAME', + depth_multiplier=1, + strides=1, + dilation_rate=1, + name='BoxEncodingPredictor_depthwise', + **conv_hyperparams.params())) + self._box_encoder_layers.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name='BoxEncodingPredictor_depthwise_batchnorm')) + self._box_encoder_layers.append( + conv_hyperparams.build_activation_layer( + name='BoxEncodingPredictor_depthwise_activation')) + self._box_encoder_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._box_code_size, [1, 1], + name='BoxEncodingPredictor', + **conv_hyperparams.params(use_bias=True))) + else: + self._box_encoder_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._box_code_size, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='BoxEncodingPredictor', + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + box_encodings: A float tensor of shape + [batch_size, num_anchors, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. + """ + box_encodings = features + for layer in self._box_encoder_layers: + box_encodings = layer(box_encodings) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + # Clipping the box encodings to make the inference graph TPU friendly. + if self._box_encodings_clip_range is not None: + box_encodings = tf.clip_by_value( + box_encodings, self._box_encodings_clip_range.min, + self._box_encodings_clip_range.max) + box_encodings = tf.reshape(box_encodings, + [batch_size, -1, 1, self._box_code_size]) + return box_encodings + + +class MaskRCNNBoxHead(head.KerasHead): + """Box prediction head. + + This is a piece of Mask RCNN which is responsible for predicting + just the box encodings. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + is_training, + num_classes, + fc_hyperparams, + freeze_batchnorm, + use_dropout, + dropout_keep_prob, + box_code_size, + share_box_across_classes=False, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + fc_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for fully connected dense ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + box_code_size: Size of encoding for each box. + share_box_across_classes: Whether to share boxes across classes rather + than use a different box for each class. + name: A string name scope to assign to the box head. If `None`, Keras + will auto-generate one from the class name. + """ + super(MaskRCNNBoxHead, self).__init__(name=name) + self._is_training = is_training + self._num_classes = num_classes + self._fc_hyperparams = fc_hyperparams + self._freeze_batchnorm = freeze_batchnorm + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._box_code_size = box_code_size + self._share_box_across_classes = share_box_across_classes + + self._box_encoder_layers = [tf.keras.layers.Flatten()] + + if self._use_dropout: + self._box_encoder_layers.append( + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + + self._number_of_boxes = 1 + if not self._share_box_across_classes: + self._number_of_boxes = self._num_classes + + self._box_encoder_layers.append( + tf.keras.layers.Dense(self._number_of_boxes * self._box_code_size, + name='BoxEncodingPredictor_dense')) + self._box_encoder_layers.append( + fc_hyperparams.build_batch_norm(training=(is_training and + not freeze_batchnorm), + name='BoxEncodingPredictor_batchnorm')) + + def _predict(self, features): + """Predicts box encodings. + + Args: + features: A float tensor of shape [batch_size, height, width, + channels] containing features for a batch of images. + + Returns: + box_encodings: A float tensor of shape + [batch_size, 1, num_classes, code_size] representing the location of the + objects. + """ + spatial_averaged_roi_pooled_features = tf.reduce_mean( + features, [1, 2], keep_dims=True, name='AvgPool') + net = spatial_averaged_roi_pooled_features + for layer in self._box_encoder_layers: + net = layer(net) + box_encodings = tf.reshape(net, + [-1, 1, + self._number_of_boxes, + self._box_code_size]) + return box_encodings + + +# TODO(b/128922690): Unify the implementations of ConvolutionalBoxHead +# and WeightSharedConvolutionalBoxHead +class WeightSharedConvolutionalBoxHead(head.KerasHead): + """Weight shared convolutional box prediction head based on Keras. + + This head allows sharing the same set of parameters (weights) when called more + then once on different feature maps. + """ + + def __init__(self, + box_code_size, + num_predictions_per_location, + conv_hyperparams, + kernel_size=3, + use_depthwise=False, + box_encodings_clip_range=None, + return_flat_predictions=True, + name=None): + """Constructor. + + Args: + box_code_size: Size of encoding for each box. + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + kernel_size: Size of final convolution kernel. + use_depthwise: Whether to use depthwise convolutions for prediction steps. + Default is False. + box_encodings_clip_range: Min and max values for clipping box_encodings. + return_flat_predictions: If true, returns flattened prediction tensor + of shape [batch, height * width * num_predictions_per_location, + box_coder]. Otherwise returns the prediction tensor before reshaping, + whose shape is [batch, height, width, num_predictions_per_location * + num_class_slots]. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(WeightSharedConvolutionalBoxHead, self).__init__(name=name) + self._box_code_size = box_code_size + self._kernel_size = kernel_size + self._num_predictions_per_location = num_predictions_per_location + self._use_depthwise = use_depthwise + self._box_encodings_clip_range = box_encodings_clip_range + self._return_flat_predictions = return_flat_predictions + + self._box_encoder_layers = [] + + if self._use_depthwise: + self._box_encoder_layers.append( + tf.keras.layers.SeparableConv2D( + num_predictions_per_location * self._box_code_size, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='BoxPredictor', + **conv_hyperparams.params(use_bias=True))) + else: + self._box_encoder_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._box_code_size, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='BoxPredictor', + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + box_encodings: A float tensor of shape + [batch_size, num_anchors, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. + """ + box_encodings = features + for layer in self._box_encoder_layers: + box_encodings = layer(box_encodings) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + # Clipping the box encodings to make the inference graph TPU friendly. + if self._box_encodings_clip_range is not None: + box_encodings = tf.clip_by_value( + box_encodings, self._box_encodings_clip_range.min, + self._box_encodings_clip_range.max) + if self._return_flat_predictions: + box_encodings = tf.reshape(box_encodings, + [batch_size, -1, self._box_code_size]) + return box_encodings diff --git a/predictors/heads/keras_box_head_test.py b/predictors/heads/keras_box_head_test.py new file mode 100644 index 0000000..929b5f9 --- /dev/null +++ b/predictors/heads/keras_box_head_test.py @@ -0,0 +1,184 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.box_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import keras_box_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class ConvolutionalKerasBoxHeadTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.ConvolutionalBoxHead( + is_training=True, + box_code_size=4, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=False) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head(image_feature) + self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list()) + + def test_prediction_size_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.ConvolutionalBoxHead( + is_training=True, + box_code_size=4, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head(image_feature) + self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list()) + + +class MaskRCNNKerasBoxHeadTest(test_case.TestCase): + + def _build_fc_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.KerasLayerHyperparams(hyperparams) + + def test_prediction_size(self): + box_prediction_head = keras_box_head.MaskRCNNBoxHead( + is_training=False, + num_classes=20, + fc_hyperparams=self._build_fc_hyperparams(), + freeze_batchnorm=False, + use_dropout=True, + dropout_keep_prob=0.5, + box_code_size=4, + share_box_across_classes=False) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = box_prediction_head(roi_pooled_features) + self.assertAllEqual([64, 1, 20, 4], prediction.get_shape().as_list()) + + +class WeightSharedConvolutionalKerasBoxHead(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=4, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=False) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head(image_feature) + self.assertAllEqual([64, 323, 4], box_encodings.get_shape().as_list()) + + def test_prediction_size_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=4, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + box_encodings = box_prediction_head(image_feature) + self.assertAllEqual([64, 323, 4], box_encodings.get_shape().as_list()) + + def test_variable_count_depth_wise_true(self): + g = tf.Graph() + with g.as_default(): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=4, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + _ = box_prediction_head(image_feature) + variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(variables), 3) + + def test_variable_count_depth_wise_False(self): + g = tf.Graph() + with g.as_default(): + conv_hyperparams = self._build_conv_hyperparams() + box_prediction_head = keras_box_head.WeightSharedConvolutionalBoxHead( + box_code_size=4, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=False) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + _ = box_prediction_head(image_feature) + variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(variables), 2) + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/keras_class_head.py b/predictors/heads/keras_class_head.py new file mode 100644 index 0000000..632fef6 --- /dev/null +++ b/predictors/heads/keras_class_head.py @@ -0,0 +1,351 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Class Head. + +Contains Class prediction head classes for different meta architectures. +All the class prediction heads have a predict function that receives the +`features` as the first argument and returns class predictions with background. +""" +import tensorflow as tf + +from object_detection.predictors.heads import head + + +class ConvolutionalClassHead(head.KerasHead): + """Convolutional class prediction head.""" + + def __init__(self, + is_training, + num_class_slots, + use_dropout, + dropout_keep_prob, + kernel_size, + num_predictions_per_location, + conv_hyperparams, + freeze_batchnorm, + class_prediction_bias_init=0.0, + use_depthwise=False, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if min_depth > max_depth. + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(ConvolutionalClassHead, self).__init__(name=name) + self._is_training = is_training + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._kernel_size = kernel_size + self._class_prediction_bias_init = class_prediction_bias_init + self._use_depthwise = use_depthwise + self._num_class_slots = num_class_slots + + self._class_predictor_layers = [] + + if self._use_dropout: + self._class_predictor_layers.append( + # The Dropout layer's `training` parameter for the call method must + # be set implicitly by the Keras set_learning_phase. The object + # detection training code takes care of this. + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + if self._use_depthwise: + self._class_predictor_layers.append( + tf.keras.layers.DepthwiseConv2D( + [self._kernel_size, self._kernel_size], + padding='SAME', + depth_multiplier=1, + strides=1, + dilation_rate=1, + name='ClassPredictor_depthwise', + **conv_hyperparams.params())) + self._class_predictor_layers.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name='ClassPredictor_depthwise_batchnorm')) + self._class_predictor_layers.append( + conv_hyperparams.build_activation_layer( + name='ClassPredictor_depthwise_activation')) + self._class_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._num_class_slots, [1, 1], + name='ClassPredictor', + **conv_hyperparams.params(use_bias=True))) + else: + self._class_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._num_class_slots, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='ClassPredictor', + bias_initializer=tf.constant_initializer( + self._class_prediction_bias_init), + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + class_predictions_with_background: A float tensor of shape + [batch_size, num_anchors, num_class_slots] representing the class + predictions for the proposals. + """ + class_predictions_with_background = features + for layer in self._class_predictor_layers: + class_predictions_with_background = layer( + class_predictions_with_background) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size, -1, self._num_class_slots]) + return class_predictions_with_background + + +class MaskRCNNClassHead(head.KerasHead): + """Mask RCNN class prediction head. + + This is a piece of Mask RCNN which is responsible for predicting + just the class scores of boxes. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + is_training, + num_class_slots, + fc_hyperparams, + freeze_batchnorm, + use_dropout, + dropout_keep_prob, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + fc_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for fully connected dense ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + name: A string name scope to assign to the class head. If `None`, Keras + will auto-generate one from the class name. + """ + super(MaskRCNNClassHead, self).__init__(name=name) + self._is_training = is_training + self._freeze_batchnorm = freeze_batchnorm + self._num_class_slots = num_class_slots + self._fc_hyperparams = fc_hyperparams + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + + self._class_predictor_layers = [tf.keras.layers.Flatten()] + + if self._use_dropout: + self._class_predictor_layers.append( + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + + self._class_predictor_layers.append( + tf.keras.layers.Dense(self._num_class_slots, + name='ClassPredictor_dense')) + self._class_predictor_layers.append( + fc_hyperparams.build_batch_norm(training=(is_training and + not freeze_batchnorm), + name='ClassPredictor_batchnorm')) + + def _predict(self, features): + """Predicts the class scores for boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing features for a batch of images. + + Returns: + class_predictions_with_background: A float tensor of shape + [batch_size, 1, num_class_slots] representing the class predictions for + the proposals. + """ + spatial_averaged_roi_pooled_features = tf.reduce_mean( + features, [1, 2], keep_dims=True, name='AvgPool') + net = spatial_averaged_roi_pooled_features + for layer in self._class_predictor_layers: + net = layer(net) + class_predictions_with_background = tf.reshape( + net, + [-1, 1, self._num_class_slots]) + return class_predictions_with_background + + +class WeightSharedConvolutionalClassHead(head.KerasHead): + """Weight shared convolutional class prediction head. + + This head allows sharing the same set of parameters (weights) when called more + then once on different feature maps. + """ + + def __init__(self, + num_class_slots, + num_predictions_per_location, + conv_hyperparams, + kernel_size=3, + class_prediction_bias_init=0.0, + use_dropout=False, + dropout_keep_prob=0.8, + use_depthwise=False, + score_converter_fn=tf.identity, + return_flat_predictions=True, + name=None): + """Constructor. + + Args: + num_class_slots: number of class slots. Note that num_class_slots may or + may not include an implicit background category. + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + kernel_size: Size of final convolution kernel. + class_prediction_bias_init: constant value to initialize bias of the last + conv2d layer before class prediction. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + score_converter_fn: Callable elementwise nonlinearity (that takes tensors + as inputs and returns tensors). + return_flat_predictions: If true, returns flattened prediction tensor + of shape [batch, height * width * num_predictions_per_location, + box_coder]. Otherwise returns the prediction tensor before reshaping, + whose shape is [batch, height, width, num_predictions_per_location * + num_class_slots]. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if use_depthwise is True and kernel_size is 1. + """ + if use_depthwise and (kernel_size == 1): + raise ValueError('Should not use 1x1 kernel when using depthwise conv') + + super(WeightSharedConvolutionalClassHead, self).__init__(name=name) + self._num_class_slots = num_class_slots + self._kernel_size = kernel_size + self._class_prediction_bias_init = class_prediction_bias_init + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._use_depthwise = use_depthwise + self._score_converter_fn = score_converter_fn + self._return_flat_predictions = return_flat_predictions + + self._class_predictor_layers = [] + + if self._use_dropout: + self._class_predictor_layers.append( + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + if self._use_depthwise: + self._class_predictor_layers.append( + tf.keras.layers.SeparableConv2D( + num_predictions_per_location * self._num_class_slots, + [self._kernel_size, self._kernel_size], + padding='SAME', + depth_multiplier=1, + strides=1, + name='ClassPredictor', + bias_initializer=tf.constant_initializer( + self._class_prediction_bias_init), + **conv_hyperparams.params(use_bias=True))) + else: + self._class_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * self._num_class_slots, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='ClassPredictor', + bias_initializer=tf.constant_initializer( + self._class_prediction_bias_init), + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + class_predictions_with_background: A float tensor of shape + [batch_size, num_anchors, num_class_slots] representing the class + predictions for the proposals. + """ + class_predictions_with_background = features + for layer in self._class_predictor_layers: + class_predictions_with_background = layer( + class_predictions_with_background) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + class_predictions_with_background = self._score_converter_fn( + class_predictions_with_background) + if self._return_flat_predictions: + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size, -1, self._num_class_slots]) + return class_predictions_with_background diff --git a/predictors/heads/keras_class_head_test.py b/predictors/heads/keras_class_head_test.py new file mode 100644 index 0000000..1c339ec --- /dev/null +++ b/predictors/heads/keras_class_head_test.py @@ -0,0 +1,191 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.class_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import keras_class_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class ConvolutionalKerasClassPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = keras_class_head.ConvolutionalClassHead( + is_training=True, + num_class_slots=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=False) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head(image_feature,) + self.assertAllEqual([64, 323, 20], + class_predictions.get_shape().as_list()) + + def test_prediction_size_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = keras_class_head.ConvolutionalClassHead( + is_training=True, + num_class_slots=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head(image_feature,) + self.assertAllEqual([64, 323, 20], + class_predictions.get_shape().as_list()) + + +class MaskRCNNClassHeadTest(test_case.TestCase): + + def _build_fc_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.KerasLayerHyperparams(hyperparams) + + def test_prediction_size(self): + class_prediction_head = keras_class_head.MaskRCNNClassHead( + is_training=False, + num_class_slots=20, + fc_hyperparams=self._build_fc_hyperparams(), + freeze_batchnorm=False, + use_dropout=True, + dropout_keep_prob=0.5) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = class_prediction_head(roi_pooled_features) + self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list()) + + +class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=20, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=False) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head(image_feature) + self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list()) + + def test_prediction_size_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=20, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + class_predictions = class_prediction_head(image_feature) + self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list()) + + def test_variable_count_depth_wise_true(self): + g = tf.Graph() + with g.as_default(): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = ( + keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=20, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=True)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + _ = class_prediction_head(image_feature) + variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(variables), 3) + + def test_variable_count_depth_wise_False(self): + g = tf.Graph() + with g.as_default(): + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = ( + keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=20, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=1, + use_depthwise=False)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + _ = class_prediction_head(image_feature) + variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(variables), 2) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/keras_mask_head.py b/predictors/heads/keras_mask_head.py new file mode 100644 index 0000000..86cc48e --- /dev/null +++ b/predictors/heads/keras_mask_head.py @@ -0,0 +1,441 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Keras Mask Heads. + +Contains Mask prediction head classes for different meta architectures. +All the mask prediction heads have a predict function that receives the +`features` as the first argument and returns `mask_predictions`. +""" +import math +import tensorflow as tf + +from object_detection.predictors.heads import head +from object_detection.utils import ops +from object_detection.utils import shape_utils + + +class ConvolutionalMaskHead(head.KerasHead): + """Convolutional class prediction head.""" + + def __init__(self, + is_training, + num_classes, + use_dropout, + dropout_keep_prob, + kernel_size, + num_predictions_per_location, + conv_hyperparams, + freeze_batchnorm, + use_depthwise=False, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=False, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: Number of classes. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Bool. Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + mask_height: Desired output mask height. The default value is 7. + mask_width: Desired output mask width. The default value is 7. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if min_depth > max_depth. + """ + super(ConvolutionalMaskHead, self).__init__(name=name) + self._is_training = is_training + self._num_classes = num_classes + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._kernel_size = kernel_size + self._num_predictions_per_location = num_predictions_per_location + self._use_depthwise = use_depthwise + self._mask_height = mask_height + self._mask_width = mask_width + self._masks_are_class_agnostic = masks_are_class_agnostic + + self._mask_predictor_layers = [] + + # Add a slot for the background class. + if self._masks_are_class_agnostic: + self._num_masks = 1 + else: + self._num_masks = self._num_classes + + num_mask_channels = self._num_masks * self._mask_height * self._mask_width + + if self._use_dropout: + self._mask_predictor_layers.append( + # The Dropout layer's `training` parameter for the call method must + # be set implicitly by the Keras set_learning_phase. The object + # detection training code takes care of this. + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + if self._use_depthwise: + self._mask_predictor_layers.append( + tf.keras.layers.DepthwiseConv2D( + [self._kernel_size, self._kernel_size], + padding='SAME', + depth_multiplier=1, + strides=1, + dilation_rate=1, + name='MaskPredictor_depthwise', + **conv_hyperparams.params())) + self._mask_predictor_layers.append( + conv_hyperparams.build_batch_norm( + training=(is_training and not freeze_batchnorm), + name='MaskPredictor_depthwise_batchnorm')) + self._mask_predictor_layers.append( + conv_hyperparams.build_activation_layer( + name='MaskPredictor_depthwise_activation')) + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * num_mask_channels, [1, 1], + name='MaskPredictor', + **conv_hyperparams.params(use_bias=True))) + else: + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * num_mask_channels, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='MaskPredictor', + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + mask_predictions: A float tensors of shape + [batch_size, num_anchors, num_masks, mask_height, mask_width] + representing the mask predictions for the proposals. + """ + mask_predictions = features + for layer in self._mask_predictor_layers: + mask_predictions = layer(mask_predictions) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + mask_predictions = tf.reshape( + mask_predictions, + [batch_size, -1, self._num_masks, self._mask_height, self._mask_width]) + return mask_predictions + + +class MaskRCNNMaskHead(head.KerasHead): + """Mask RCNN mask prediction head. + + This is a piece of Mask RCNN which is responsible for predicting + just the pixelwise foreground scores for regions within the boxes. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + is_training, + num_classes, + freeze_batchnorm, + conv_hyperparams, + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False, + convolve_then_upsample=False, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the Mask head is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + mask_height: Desired output mask height. The default value is 14. + mask_width: Desired output mask width. The default value is 14. + mask_prediction_num_conv_layers: Number of convolution layers applied to + the image_features in mask prediction branch. + mask_prediction_conv_depth: The depth for the first conv2d_transpose op + applied to the image_features in the mask prediction branch. If set + to 0, the depth of the convolution layers will be automatically chosen + based on the number of object classes and the number of channels in the + image features. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + convolve_then_upsample: Whether to apply convolutions on mask features + before upsampling using nearest neighbor resizing. Otherwise, mask + features are resized to [`mask_height`, `mask_width`] using bilinear + resizing before applying convolutions. + name: A string name scope to assign to the mask head. If `None`, Keras + will auto-generate one from the class name. + """ + super(MaskRCNNMaskHead, self).__init__(name=name) + self._is_training = is_training + self._freeze_batchnorm = freeze_batchnorm + self._num_classes = num_classes + self._conv_hyperparams = conv_hyperparams + self._mask_height = mask_height + self._mask_width = mask_width + self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers + self._mask_prediction_conv_depth = mask_prediction_conv_depth + self._masks_are_class_agnostic = masks_are_class_agnostic + self._convolve_then_upsample = convolve_then_upsample + + self._mask_predictor_layers = [] + + def build(self, input_shapes): + num_conv_channels = self._mask_prediction_conv_depth + if num_conv_channels == 0: + num_feature_channels = input_shapes.as_list()[3] + num_conv_channels = self._get_mask_predictor_conv_depth( + num_feature_channels, self._num_classes) + + for i in range(self._mask_prediction_num_conv_layers - 1): + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_conv_channels, + [3, 3], + padding='SAME', + name='MaskPredictor_conv2d_{}'.format(i), + **self._conv_hyperparams.params())) + self._mask_predictor_layers.append( + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='MaskPredictor_batchnorm_{}'.format(i))) + self._mask_predictor_layers.append( + self._conv_hyperparams.build_activation_layer( + name='MaskPredictor_activation_{}'.format(i))) + + if self._convolve_then_upsample: + # Replace Transposed Convolution with a Nearest Neighbor upsampling step + # followed by 3x3 convolution. + height_scale = self._mask_height / shape_utils.get_dim_as_int( + input_shapes[1]) + width_scale = self._mask_width / shape_utils.get_dim_as_int( + input_shapes[2]) + # pylint: disable=g-long-lambda + self._mask_predictor_layers.append(tf.keras.layers.Lambda( + lambda features: ops.nearest_neighbor_upsampling( + features, height_scale=height_scale, width_scale=width_scale) + )) + # pylint: enable=g-long-lambda + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_conv_channels, + [3, 3], + padding='SAME', + name='MaskPredictor_upsample_conv2d', + **self._conv_hyperparams.params())) + self._mask_predictor_layers.append( + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='MaskPredictor_upsample_batchnorm')) + self._mask_predictor_layers.append( + self._conv_hyperparams.build_activation_layer( + name='MaskPredictor_upsample_activation')) + + num_masks = 1 if self._masks_are_class_agnostic else self._num_classes + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_masks, + [3, 3], + padding='SAME', + name='MaskPredictor_last_conv2d', + **self._conv_hyperparams.params(use_bias=True))) + + self.built = True + + def _get_mask_predictor_conv_depth(self, + num_feature_channels, + num_classes, + class_weight=3.0, + feature_weight=2.0): + """Computes the depth of the mask predictor convolutions. + + Computes the depth of the mask predictor convolutions given feature channels + and number of classes by performing a weighted average of the two in + log space to compute the number of convolution channels. The weights that + are used for computing the weighted average do not need to sum to 1. + + Args: + num_feature_channels: An integer containing the number of feature + channels. + num_classes: An integer containing the number of classes. + class_weight: Class weight used in computing the weighted average. + feature_weight: Feature weight used in computing the weighted average. + + Returns: + An integer containing the number of convolution channels used by mask + predictor. + """ + num_feature_channels_log = math.log(float(num_feature_channels), 2.0) + num_classes_log = math.log(float(num_classes), 2.0) + weighted_num_feature_channels_log = ( + num_feature_channels_log * feature_weight) + weighted_num_classes_log = num_classes_log * class_weight + total_weight = feature_weight + class_weight + num_conv_channels_log = round( + (weighted_num_feature_channels_log + weighted_num_classes_log) / + total_weight) + return int(math.pow(2.0, num_conv_channels_log)) + + def _predict(self, features): + """Predicts pixelwise foreground scores for regions within the boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing features for a batch of images. + + Returns: + instance_masks: A float tensor of shape + [batch_size, 1, num_classes, mask_height, mask_width]. + """ + if not self._convolve_then_upsample: + features = tf.image.resize_bilinear( + features, [self._mask_height, self._mask_width], + align_corners=True) + + mask_predictions = features + for layer in self._mask_predictor_layers: + mask_predictions = layer(mask_predictions) + return tf.expand_dims( + tf.transpose(mask_predictions, perm=[0, 3, 1, 2]), + axis=1, + name='MaskPredictor') + + +class WeightSharedConvolutionalMaskHead(head.KerasHead): + """Weight shared convolutional mask prediction head based on Keras.""" + + def __init__(self, + num_classes, + num_predictions_per_location, + conv_hyperparams, + kernel_size=3, + use_dropout=False, + dropout_keep_prob=0.8, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=False, + name=None): + """Constructor. + + Args: + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + num_predictions_per_location: Number of box predictions to be made per + spatial location. Int specifying number of boxes per location. + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + kernel_size: Size of final convolution kernel. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + mask_height: Desired output mask height. The default value is 7. + mask_width: Desired output mask width. The default value is 7. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + + Raises: + ValueError: if min_depth > max_depth. + """ + super(WeightSharedConvolutionalMaskHead, self).__init__(name=name) + self._num_classes = num_classes + self._num_predictions_per_location = num_predictions_per_location + self._kernel_size = kernel_size + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._mask_height = mask_height + self._mask_width = mask_width + self._masks_are_class_agnostic = masks_are_class_agnostic + + self._mask_predictor_layers = [] + + if self._masks_are_class_agnostic: + self._num_masks = 1 + else: + self._num_masks = self._num_classes + num_mask_channels = self._num_masks * self._mask_height * self._mask_width + + if self._use_dropout: + self._mask_predictor_layers.append( + tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) + self._mask_predictor_layers.append( + tf.keras.layers.Conv2D( + num_predictions_per_location * num_mask_channels, + [self._kernel_size, self._kernel_size], + padding='SAME', + name='MaskPredictor', + **conv_hyperparams.params(use_bias=True))) + + def _predict(self, features): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + + Returns: + mask_predictions: A tensor of shape + [batch_size, num_anchors, num_classes, mask_height, mask_width] + representing the mask predictions for the proposals. + """ + mask_predictions = features + for layer in self._mask_predictor_layers: + mask_predictions = layer(mask_predictions) + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + mask_predictions = tf.reshape( + mask_predictions, + [batch_size, -1, self._num_masks, self._mask_height, self._mask_width]) + return mask_predictions diff --git a/predictors/heads/keras_mask_head_test.py b/predictors/heads/keras_mask_head_test.py new file mode 100644 index 0000000..46baeb1 --- /dev/null +++ b/predictors/heads/keras_mask_head_test.py @@ -0,0 +1,229 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.mask_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import keras_mask_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class ConvolutionalMaskPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size_use_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + mask_prediction_head = keras_mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=False, + mask_height=7, + mask_width=7) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 20, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_prediction_size_use_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + mask_prediction_head = keras_mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=True, + mask_height=7, + mask_width=7) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 20, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_class_agnostic_prediction_size_use_depthwise_false(self): + conv_hyperparams = self._build_conv_hyperparams() + mask_prediction_head = keras_mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=False, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 1, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_class_agnostic_prediction_size_use_depthwise_true(self): + conv_hyperparams = self._build_conv_hyperparams() + mask_prediction_head = keras_mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + conv_hyperparams=conv_hyperparams, + freeze_batchnorm=False, + num_predictions_per_location=1, + use_depthwise=True, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 1, 7, 7], + mask_predictions.get_shape().as_list()) + + +class MaskRCNNMaskHeadTest(test_case.TestCase): + + def _build_conv_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.KerasLayerHyperparams(hyperparams) + + def test_prediction_size(self): + mask_prediction_head = keras_mask_head.MaskRCNNMaskHead( + is_training=True, + num_classes=20, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = mask_prediction_head(roi_pooled_features) + self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list()) + + def test_prediction_size_with_convolve_then_upsample(self): + mask_prediction_head = keras_mask_head.MaskRCNNMaskHead( + is_training=True, + num_classes=20, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + mask_height=28, + mask_width=28, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=True, + convolve_then_upsample=True) + roi_pooled_features = tf.random_uniform( + [64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = mask_prediction_head(roi_pooled_features) + self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list()) + + +class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_prediction_size(self): + mask_prediction_head = ( + keras_mask_head.WeightSharedConvolutionalMaskHead( + num_classes=20, + num_predictions_per_location=1, + conv_hyperparams=self._build_conv_hyperparams(), + mask_height=7, + mask_width=7)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 20, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_class_agnostic_prediction_size(self): + mask_prediction_head = ( + keras_mask_head.WeightSharedConvolutionalMaskHead( + num_classes=20, + num_predictions_per_location=1, + conv_hyperparams=self._build_conv_hyperparams(), + mask_height=7, + mask_width=7, + masks_are_class_agnostic=True)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head(image_feature) + self.assertAllEqual([64, 323, 1, 7, 7], + mask_predictions.get_shape().as_list()) + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/keypoint_head.py b/predictors/heads/keypoint_head.py new file mode 100644 index 0000000..18cac4a --- /dev/null +++ b/predictors/heads/keypoint_head.py @@ -0,0 +1,110 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Keypoint Head. + +Contains Keypoint prediction head classes for different meta architectures. +All the keypoint prediction heads have a predict function that receives the +`features` as the first argument and returns `keypoint_predictions`. +Keypoints could be used to represent the human body joint locations as in +Mask RCNN paper. Or they could be used to represent different part locations of +objects. +""" +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.predictors.heads import head +slim = contrib_slim + + +class MaskRCNNKeypointHead(head.Head): + """Mask RCNN keypoint prediction head. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + num_keypoints=17, + conv_hyperparams_fn=None, + keypoint_heatmap_height=56, + keypoint_heatmap_width=56, + keypoint_prediction_num_conv_layers=8, + keypoint_prediction_conv_depth=512): + """Constructor. + + Args: + num_keypoints: (int scalar) number of keypoints. + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + keypoint_heatmap_height: Desired output mask height. The default value + is 14. + keypoint_heatmap_width: Desired output mask width. The default value + is 14. + keypoint_prediction_num_conv_layers: Number of convolution layers applied + to the image_features in mask prediction branch. + keypoint_prediction_conv_depth: The depth for the first conv2d_transpose + op applied to the image_features in the mask prediction branch. If set + to 0, the depth of the convolution layers will be automatically chosen + based on the number of object classes and the number of channels in the + image features. + """ + super(MaskRCNNKeypointHead, self).__init__() + self._num_keypoints = num_keypoints + self._conv_hyperparams_fn = conv_hyperparams_fn + self._keypoint_heatmap_height = keypoint_heatmap_height + self._keypoint_heatmap_width = keypoint_heatmap_width + self._keypoint_prediction_num_conv_layers = ( + keypoint_prediction_num_conv_layers) + self._keypoint_prediction_conv_depth = keypoint_prediction_conv_depth + + def predict(self, features, num_predictions_per_location=1): + """Performs keypoint prediction. + + Args: + features: A float tensor of shape [batch_size, height, width, + channels] containing features for a batch of images. + num_predictions_per_location: Int containing number of predictions per + location. + + Returns: + instance_masks: A float tensor of shape + [batch_size, 1, num_keypoints, heatmap_height, heatmap_width]. + + Raises: + ValueError: If num_predictions_per_location is not 1. + """ + if num_predictions_per_location != 1: + raise ValueError('Only num_predictions_per_location=1 is supported') + with slim.arg_scope(self._conv_hyperparams_fn()): + net = slim.conv2d( + features, + self._keypoint_prediction_conv_depth, [3, 3], + scope='conv_1') + for i in range(1, self._keypoint_prediction_num_conv_layers): + net = slim.conv2d( + net, + self._keypoint_prediction_conv_depth, [3, 3], + scope='conv_%d' % (i + 1)) + net = slim.conv2d_transpose( + net, self._num_keypoints, [2, 2], scope='deconv1') + heatmaps_mask = tf.image.resize_bilinear( + net, [self._keypoint_heatmap_height, self._keypoint_heatmap_width], + align_corners=True, + name='upsample') + return tf.expand_dims( + tf.transpose(heatmaps_mask, perm=[0, 3, 1, 2]), + axis=1, + name='KeypointPredictor') diff --git a/predictors/heads/keypoint_head_test.py b/predictors/heads/keypoint_head_test.py new file mode 100644 index 0000000..626d59c --- /dev/null +++ b/predictors/heads/keypoint_head_test.py @@ -0,0 +1,57 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.keypoint_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import keypoint_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNKeypointHeadTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + keypoint_prediction_head = keypoint_head.MaskRCNNKeypointHead( + conv_hyperparams_fn=self._build_arg_scope_with_hyperparams()) + roi_pooled_features = tf.random_uniform( + [64, 14, 14, 1024], minval=-2.0, maxval=2.0, dtype=tf.float32) + prediction = keypoint_prediction_head.predict( + features=roi_pooled_features, num_predictions_per_location=1) + self.assertAllEqual([64, 1, 17, 56, 56], prediction.get_shape().as_list()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/heads/mask_head.py b/predictors/heads/mask_head.py new file mode 100644 index 0000000..d30a523 --- /dev/null +++ b/predictors/heads/mask_head.py @@ -0,0 +1,356 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Mask Head. + +Contains Mask prediction head classes for different meta architectures. +All the mask prediction heads have a predict function that receives the +`features` as the first argument and returns `mask_predictions`. +""" +import math +import tensorflow as tf +from tensorflow.contrib import slim as contrib_slim + +from object_detection.predictors.heads import head +from object_detection.utils import ops + +slim = contrib_slim + + +class MaskRCNNMaskHead(head.Head): + """Mask RCNN mask prediction head. + + Please refer to Mask RCNN paper: + https://arxiv.org/abs/1703.06870 + """ + + def __init__(self, + num_classes, + conv_hyperparams_fn=None, + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False, + convolve_then_upsample=False): + """Constructor. + + Args: + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams_fn: A function to generate tf-slim arg_scope with + hyperparameters for convolution ops. + mask_height: Desired output mask height. The default value is 14. + mask_width: Desired output mask width. The default value is 14. + mask_prediction_num_conv_layers: Number of convolution layers applied to + the image_features in mask prediction branch. + mask_prediction_conv_depth: The depth for the first conv2d_transpose op + applied to the image_features in the mask prediction branch. If set + to 0, the depth of the convolution layers will be automatically chosen + based on the number of object classes and the number of channels in the + image features. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + convolve_then_upsample: Whether to apply convolutions on mask features + before upsampling using nearest neighbor resizing. Otherwise, mask + features are resized to [`mask_height`, `mask_width`] using bilinear + resizing before applying convolutions. + + Raises: + ValueError: conv_hyperparams_fn is None. + """ + super(MaskRCNNMaskHead, self).__init__() + self._num_classes = num_classes + self._conv_hyperparams_fn = conv_hyperparams_fn + self._mask_height = mask_height + self._mask_width = mask_width + self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers + self._mask_prediction_conv_depth = mask_prediction_conv_depth + self._masks_are_class_agnostic = masks_are_class_agnostic + self._convolve_then_upsample = convolve_then_upsample + if conv_hyperparams_fn is None: + raise ValueError('conv_hyperparams_fn is None.') + + def _get_mask_predictor_conv_depth(self, + num_feature_channels, + num_classes, + class_weight=3.0, + feature_weight=2.0): + """Computes the depth of the mask predictor convolutions. + + Computes the depth of the mask predictor convolutions given feature channels + and number of classes by performing a weighted average of the two in + log space to compute the number of convolution channels. The weights that + are used for computing the weighted average do not need to sum to 1. + + Args: + num_feature_channels: An integer containing the number of feature + channels. + num_classes: An integer containing the number of classes. + class_weight: Class weight used in computing the weighted average. + feature_weight: Feature weight used in computing the weighted average. + + Returns: + An integer containing the number of convolution channels used by mask + predictor. + """ + num_feature_channels_log = math.log(float(num_feature_channels), 2.0) + num_classes_log = math.log(float(num_classes), 2.0) + weighted_num_feature_channels_log = ( + num_feature_channels_log * feature_weight) + weighted_num_classes_log = num_classes_log * class_weight + total_weight = feature_weight + class_weight + num_conv_channels_log = round( + (weighted_num_feature_channels_log + weighted_num_classes_log) / + total_weight) + return int(math.pow(2.0, num_conv_channels_log)) + + def predict(self, features, num_predictions_per_location=1): + """Performs mask prediction. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing features for a batch of images. + num_predictions_per_location: Int containing number of predictions per + location. + + Returns: + instance_masks: A float tensor of shape + [batch_size, 1, num_classes, mask_height, mask_width]. + + Raises: + ValueError: If num_predictions_per_location is not 1. + """ + if num_predictions_per_location != 1: + raise ValueError('Only num_predictions_per_location=1 is supported') + num_conv_channels = self._mask_prediction_conv_depth + if num_conv_channels == 0: + num_feature_channels = features.get_shape().as_list()[3] + num_conv_channels = self._get_mask_predictor_conv_depth( + num_feature_channels, self._num_classes) + with slim.arg_scope(self._conv_hyperparams_fn()): + if not self._convolve_then_upsample: + features = tf.image.resize_bilinear( + features, [self._mask_height, self._mask_width], + align_corners=True) + for _ in range(self._mask_prediction_num_conv_layers - 1): + features = slim.conv2d( + features, + num_outputs=num_conv_channels, + kernel_size=[3, 3]) + if self._convolve_then_upsample: + # Replace Transposed Convolution with a Nearest Neighbor upsampling step + # followed by 3x3 convolution. + height_scale = self._mask_height / features.shape[1].value + width_scale = self._mask_width / features.shape[2].value + features = ops.nearest_neighbor_upsampling( + features, height_scale=height_scale, width_scale=width_scale) + features = slim.conv2d( + features, + num_outputs=num_conv_channels, + kernel_size=[3, 3]) + + num_masks = 1 if self._masks_are_class_agnostic else self._num_classes + mask_predictions = slim.conv2d( + features, + num_outputs=num_masks, + activation_fn=None, + normalizer_fn=None, + kernel_size=[3, 3]) + return tf.expand_dims( + tf.transpose(mask_predictions, perm=[0, 3, 1, 2]), + axis=1, + name='MaskPredictor') + + +class ConvolutionalMaskHead(head.Head): + """Convolutional class prediction head.""" + + def __init__(self, + is_training, + num_classes, + use_dropout, + dropout_keep_prob, + kernel_size, + use_depthwise=False, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=False): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: Number of classes. + use_dropout: Option to use dropout or not. Note that a single dropout + op is applied here prior to both box and class predictions, which stands + in contrast to the ConvolutionalBoxPredictor below. + dropout_keep_prob: Keep probability for dropout. + This is only used if use_dropout is True. + kernel_size: Size of final convolution kernel. If the + spatial resolution of the feature map is smaller than the kernel size, + then the kernel size is automatically set to be + min(feature_width, feature_height). + use_depthwise: Whether to use depthwise convolutions for prediction + steps. Default is False. + mask_height: Desired output mask height. The default value is 7. + mask_width: Desired output mask width. The default value is 7. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + + Raises: + ValueError: if min_depth > max_depth. + """ + super(ConvolutionalMaskHead, self).__init__() + self._is_training = is_training + self._num_classes = num_classes + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._kernel_size = kernel_size + self._use_depthwise = use_depthwise + self._mask_height = mask_height + self._mask_width = mask_width + self._masks_are_class_agnostic = masks_are_class_agnostic + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. + + Returns: + mask_predictions: A float tensors of shape + [batch_size, num_anchors, num_masks, mask_height, mask_width] + representing the mask predictions for the proposals. + """ + image_feature = features + # Add a slot for the background class. + if self._masks_are_class_agnostic: + num_masks = 1 + else: + num_masks = self._num_classes + num_mask_channels = num_masks * self._mask_height * self._mask_width + net = image_feature + if self._use_dropout: + net = slim.dropout(net, keep_prob=self._dropout_keep_prob) + if self._use_depthwise: + mask_predictions = slim.separable_conv2d( + net, None, [self._kernel_size, self._kernel_size], + padding='SAME', depth_multiplier=1, stride=1, + rate=1, scope='MaskPredictor_depthwise') + mask_predictions = slim.conv2d( + mask_predictions, + num_predictions_per_location * num_mask_channels, + [1, 1], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope='MaskPredictor') + else: + mask_predictions = slim.conv2d( + net, + num_predictions_per_location * num_mask_channels, + [self._kernel_size, self._kernel_size], + activation_fn=None, + normalizer_fn=None, + normalizer_params=None, + scope='MaskPredictor') + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + mask_predictions = tf.reshape( + mask_predictions, + [batch_size, -1, num_masks, self._mask_height, self._mask_width]) + return mask_predictions + + +# TODO(alirezafathi): See if possible to unify Weight Shared with regular +# convolutional mask head. +class WeightSharedConvolutionalMaskHead(head.Head): + """Weight shared convolutional mask prediction head.""" + + def __init__(self, + num_classes, + kernel_size=3, + use_dropout=False, + dropout_keep_prob=0.8, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=False): + """Constructor. + + Args: + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + kernel_size: Size of final convolution kernel. + use_dropout: Whether to apply dropout to class prediction head. + dropout_keep_prob: Probability of keeping activiations. + mask_height: Desired output mask height. The default value is 7. + mask_width: Desired output mask width. The default value is 7. + masks_are_class_agnostic: Boolean determining if the mask-head is + class-agnostic or not. + """ + super(WeightSharedConvolutionalMaskHead, self).__init__() + self._num_classes = num_classes + self._kernel_size = kernel_size + self._use_dropout = use_dropout + self._dropout_keep_prob = dropout_keep_prob + self._mask_height = mask_height + self._mask_width = mask_width + self._masks_are_class_agnostic = masks_are_class_agnostic + + def predict(self, features, num_predictions_per_location): + """Predicts boxes. + + Args: + features: A float tensor of shape [batch_size, height, width, channels] + containing image features. + num_predictions_per_location: Number of box predictions to be made per + spatial location. + + Returns: + mask_predictions: A tensor of shape + [batch_size, num_anchors, num_classes, mask_height, mask_width] + representing the mask predictions for the proposals. + """ + mask_predictions_net = features + if self._masks_are_class_agnostic: + num_masks = 1 + else: + num_masks = self._num_classes + num_mask_channels = num_masks * self._mask_height * self._mask_width + if self._use_dropout: + mask_predictions_net = slim.dropout( + mask_predictions_net, keep_prob=self._dropout_keep_prob) + mask_predictions = slim.conv2d( + mask_predictions_net, + num_predictions_per_location * num_mask_channels, + [self._kernel_size, self._kernel_size], + activation_fn=None, stride=1, padding='SAME', + normalizer_fn=None, + scope='MaskPredictor') + batch_size = features.get_shape().as_list()[0] + if batch_size is None: + batch_size = tf.shape(features)[0] + mask_predictions = tf.reshape( + mask_predictions, + [batch_size, -1, num_masks, self._mask_height, self._mask_width]) + return mask_predictions diff --git a/predictors/heads/mask_head_test.py b/predictors/heads/mask_head_test.py new file mode 100644 index 0000000..ae46d6a --- /dev/null +++ b/predictors/heads/mask_head_test.py @@ -0,0 +1,185 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.heads.mask_head.""" +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors.heads import mask_head +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNMaskHeadTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + mask_prediction_head = mask_head.MaskRCNNMaskHead( + num_classes=20, + conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + mask_height=14, + mask_width=14, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=False) + roi_pooled_features = tf.random_uniform( + [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = mask_prediction_head.predict( + features=roi_pooled_features, num_predictions_per_location=1) + self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list()) + + def test_prediction_size_with_convolve_then_upsample(self): + mask_prediction_head = mask_head.MaskRCNNMaskHead( + num_classes=20, + conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + mask_height=28, + mask_width=28, + mask_prediction_num_conv_layers=2, + mask_prediction_conv_depth=256, + masks_are_class_agnostic=True, + convolve_then_upsample=True) + roi_pooled_features = tf.random_uniform( + [64, 14, 14, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + prediction = mask_prediction_head.predict( + features=roi_pooled_features, num_predictions_per_location=1) + self.assertAllEqual([64, 1, 1, 28, 28], prediction.get_shape().as_list()) + + +class ConvolutionalMaskPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + mask_prediction_head = mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + mask_height=7, + mask_width=7) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 20, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_class_agnostic_prediction_size(self): + mask_prediction_head = mask_head.ConvolutionalMaskHead( + is_training=True, + num_classes=20, + use_dropout=True, + dropout_keep_prob=0.5, + kernel_size=3, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=True) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 1, 7, 7], + mask_predictions.get_shape().as_list()) + + +class WeightSharedConvolutionalMaskPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams( + self, op_type=hyperparams_pb2.Hyperparams.CONV): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_prediction_size(self): + mask_prediction_head = ( + mask_head.WeightSharedConvolutionalMaskHead( + num_classes=20, + mask_height=7, + mask_width=7)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 20, 7, 7], + mask_predictions.get_shape().as_list()) + + def test_class_agnostic_prediction_size(self): + mask_prediction_head = ( + mask_head.WeightSharedConvolutionalMaskHead( + num_classes=20, + mask_height=7, + mask_width=7, + masks_are_class_agnostic=True)) + image_feature = tf.random_uniform( + [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) + mask_predictions = mask_prediction_head.predict( + features=image_feature, + num_predictions_per_location=1) + self.assertAllEqual([64, 323, 1, 7, 7], + mask_predictions.get_shape().as_list()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/mask_rcnn_box_predictor.py b/predictors/mask_rcnn_box_predictor.py new file mode 100644 index 0000000..2fd1b69 --- /dev/null +++ b/predictors/mask_rcnn_box_predictor.py @@ -0,0 +1,144 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Mask R-CNN Box Predictor.""" +import tensorflow as tf + +from object_detection.core import box_predictor + +slim = tf.contrib.slim + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class MaskRCNNBoxPredictor(box_predictor.BoxPredictor): + """Mask R-CNN Box Predictor. + + See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017). + Mask R-CNN. arXiv preprint arXiv:1703.06870. + + This is used for the second stage of the Mask R-CNN detector where proposals + cropped from an image are arranged along the batch dimension of the input + image_features tensor. Notice that locations are *not* shared across classes, + thus for each anchor, a separate prediction is made for each class. + + In addition to predicting boxes and classes, optionally this class allows + predicting masks and/or keypoints inside detection boxes. + + Currently this box predictor makes per-class predictions; that is, each + anchor makes a separate box prediction for each class. + """ + + def __init__(self, + is_training, + num_classes, + box_prediction_head, + class_prediction_head, + third_stage_heads): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + box_prediction_head: The head that predicts the boxes in second stage. + class_prediction_head: The head that predicts the classes in second stage. + third_stage_heads: A dictionary mapping head names to mask rcnn head + classes. + """ + super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes) + self._box_prediction_head = box_prediction_head + self._class_prediction_head = class_prediction_head + self._third_stage_heads = third_stage_heads + + @property + def num_classes(self): + return self._num_classes + + def get_second_stage_prediction_heads(self): + return BOX_ENCODINGS, CLASS_PREDICTIONS_WITH_BACKGROUND + + def get_third_stage_prediction_heads(self): + return sorted(self._third_stage_heads.keys()) + + def _predict(self, + image_features, + num_predictions_per_location, + prediction_stage=2): + """Optionally computes encoded object locations, confidences, and masks. + + Predicts the heads belonging to the given prediction stage. + + Args: + image_features: A list of float tensors of shape + [batch_size, height_i, width_i, channels_i] containing roi pooled + features for each image. The length of the list should be 1 otherwise + a ValueError will be raised. + num_predictions_per_location: A list of integers representing the number + of box predictions to be made per spatial location for each feature map. + Currently, this must be set to [1], or an error will be raised. + prediction_stage: Prediction stage. Acceptable values are 2 and 3. + + Returns: + A dictionary containing the predicted tensors that are listed in + self._prediction_heads. A subset of the following keys will exist in the + dictionary: + BOX_ENCODINGS: A float tensor of shape + [batch_size, 1, num_classes, code_size] representing the + location of the objects. + CLASS_PREDICTIONS_WITH_BACKGROUND: A float tensor of shape + [batch_size, 1, num_classes + 1] representing the class + predictions for the proposals. + MASK_PREDICTIONS: A float tensor of shape + [batch_size, 1, num_classes, image_height, image_width] + + Raises: + ValueError: If num_predictions_per_location is not 1 or if + len(image_features) is not 1. + ValueError: if prediction_stage is not 2 or 3. + """ + if (len(num_predictions_per_location) != 1 or + num_predictions_per_location[0] != 1): + raise ValueError('Currently FullyConnectedBoxPredictor only supports ' + 'predicting a single box per class per location.') + if len(image_features) != 1: + raise ValueError('length of `image_features` must be 1. Found {}'.format( + len(image_features))) + image_feature = image_features[0] + predictions_dict = {} + + if prediction_stage == 2: + predictions_dict[BOX_ENCODINGS] = self._box_prediction_head.predict( + features=image_feature, + num_predictions_per_location=num_predictions_per_location[0]) + predictions_dict[CLASS_PREDICTIONS_WITH_BACKGROUND] = ( + self._class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=num_predictions_per_location[0])) + elif prediction_stage == 3: + for prediction_head in self.get_third_stage_prediction_heads(): + head_object = self._third_stage_heads[prediction_head] + predictions_dict[prediction_head] = head_object.predict( + features=image_feature, + num_predictions_per_location=num_predictions_per_location[0]) + else: + raise ValueError('prediction_stage should be either 2 or 3.') + + return predictions_dict diff --git a/predictors/mask_rcnn_box_predictor_test.py b/predictors/mask_rcnn_box_predictor_test.py new file mode 100644 index 0000000..77042b5 --- /dev/null +++ b/predictors/mask_rcnn_box_predictor_test.py @@ -0,0 +1,151 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.mask_rcnn_box_predictor.""" +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.predictors import mask_rcnn_box_predictor as box_predictor +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.build(hyperparams, is_training=True) + + def test_get_boxes_with_five_classes(self): + def graph_fn(image_features): + mask_box_predictor = box_predictor_builder.build_mask_rcnn_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + ) + box_predictions = mask_box_predictor.predict( + [image_features], + num_predictions_per_location=[1], + scope='BoxPredictor', + prediction_stage=2) + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [2, 1, 5, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6]) + + def test_get_boxes_with_five_classes_share_box_across_classes(self): + def graph_fn(image_features): + mask_box_predictor = box_predictor_builder.build_mask_rcnn_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + share_box_across_classes=True + ) + box_predictions = mask_box_predictor.predict( + [image_features], + num_predictions_per_location=[1], + scope='BoxPredictor', + prediction_stage=2) + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [2, 1, 1, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6]) + + def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self): + with self.assertRaises(ValueError): + box_predictor_builder.build_mask_rcnn_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + predict_instance_masks=True) + + def test_get_instance_masks(self): + def graph_fn(image_features): + mask_box_predictor = box_predictor_builder.build_mask_rcnn_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + conv_hyperparams_fn=self._build_arg_scope_with_hyperparams( + op_type=hyperparams_pb2.Hyperparams.CONV), + predict_instance_masks=True) + box_predictions = mask_box_predictor.predict( + [image_features], + num_predictions_per_location=[1], + scope='BoxPredictor', + prediction_stage=3) + return (box_predictions[box_predictor.MASK_PREDICTIONS],) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + mask_predictions = self.execute(graph_fn, [image_features]) + self.assertAllEqual(mask_predictions.shape, [2, 1, 5, 14, 14]) + + def test_do_not_return_instance_masks_without_request(self): + image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) + mask_box_predictor = box_predictor_builder.build_mask_rcnn_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4) + box_predictions = mask_box_predictor.predict( + [image_features], + num_predictions_per_location=[1], + scope='BoxPredictor', + prediction_stage=2) + self.assertEqual(len(box_predictions), 2) + self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions) + self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND + in box_predictions) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/mask_rcnn_keras_box_predictor.py b/predictors/mask_rcnn_keras_box_predictor.py new file mode 100644 index 0000000..baca02e --- /dev/null +++ b/predictors/mask_rcnn_keras_box_predictor.py @@ -0,0 +1,139 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Mask R-CNN Box Predictor.""" +from object_detection.core import box_predictor + + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class MaskRCNNKerasBoxPredictor(box_predictor.KerasBoxPredictor): + """Mask R-CNN Box Predictor. + + See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017). + Mask R-CNN. arXiv preprint arXiv:1703.06870. + + This is used for the second stage of the Mask R-CNN detector where proposals + cropped from an image are arranged along the batch dimension of the input + image_features tensor. Notice that locations are *not* shared across classes, + thus for each anchor, a separate prediction is made for each class. + + In addition to predicting boxes and classes, optionally this class allows + predicting masks and/or keypoints inside detection boxes. + + Currently this box predictor makes per-class predictions; that is, each + anchor makes a separate box prediction for each class. + """ + + def __init__(self, + is_training, + num_classes, + freeze_batchnorm, + box_prediction_head, + class_prediction_head, + third_stage_heads, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + box_prediction_head: The head that predicts the boxes in second stage. + class_prediction_head: The head that predicts the classes in second stage. + third_stage_heads: A dictionary mapping head names to mask rcnn head + classes. + name: A string name scope to assign to the model. If `None`, Keras + will auto-generate one from the class name. + """ + super(MaskRCNNKerasBoxPredictor, self).__init__( + is_training, num_classes, freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=False, name=name) + self._box_prediction_head = box_prediction_head + self._class_prediction_head = class_prediction_head + self._third_stage_heads = third_stage_heads + + @property + def num_classes(self): + return self._num_classes + + def get_second_stage_prediction_heads(self): + return BOX_ENCODINGS, CLASS_PREDICTIONS_WITH_BACKGROUND + + def get_third_stage_prediction_heads(self): + return sorted(self._third_stage_heads.keys()) + + def _predict(self, + image_features, + prediction_stage=2, + **kwargs): + """Optionally computes encoded object locations, confidences, and masks. + + Predicts the heads belonging to the given prediction stage. + + Args: + image_features: A list of float tensors of shape + [batch_size, height_i, width_i, channels_i] containing roi pooled + features for each image. The length of the list should be 1 otherwise + a ValueError will be raised. + prediction_stage: Prediction stage. Acceptable values are 2 and 3. + **kwargs: Unused Keyword args + + Returns: + A dictionary containing the predicted tensors that are listed in + self._prediction_heads. A subset of the following keys will exist in the + dictionary: + BOX_ENCODINGS: A float tensor of shape + [batch_size, 1, num_classes, code_size] representing the + location of the objects. + CLASS_PREDICTIONS_WITH_BACKGROUND: A float tensor of shape + [batch_size, 1, num_classes + 1] representing the class + predictions for the proposals. + MASK_PREDICTIONS: A float tensor of shape + [batch_size, 1, num_classes, image_height, image_width] + + Raises: + ValueError: If num_predictions_per_location is not 1 or if + len(image_features) is not 1. + ValueError: if prediction_stage is not 2 or 3. + """ + if len(image_features) != 1: + raise ValueError('length of `image_features` must be 1. Found {}'.format( + len(image_features))) + image_feature = image_features[0] + predictions_dict = {} + + if prediction_stage == 2: + predictions_dict[BOX_ENCODINGS] = self._box_prediction_head(image_feature) + predictions_dict[CLASS_PREDICTIONS_WITH_BACKGROUND] = ( + self._class_prediction_head(image_feature)) + elif prediction_stage == 3: + for prediction_head in self.get_third_stage_prediction_heads(): + head_object = self._third_stage_heads[prediction_head] + predictions_dict[prediction_head] = head_object(image_feature) + else: + raise ValueError('prediction_stage should be either 2 or 3.') + + return predictions_dict diff --git a/predictors/mask_rcnn_keras_box_predictor_test.py b/predictors/mask_rcnn_keras_box_predictor_test.py new file mode 100644 index 0000000..03cad61 --- /dev/null +++ b/predictors/mask_rcnn_keras_box_predictor_test.py @@ -0,0 +1,140 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.mask_rcnn_box_predictor.""" +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.predictors import mask_rcnn_keras_box_predictor as box_predictor +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class MaskRCNNKerasBoxPredictorTest(test_case.TestCase): + + def _build_hyperparams(self, + op_type=hyperparams_pb2.Hyperparams.FC): + hyperparams = hyperparams_pb2.Hyperparams() + hyperparams_text_proto = """ + activation: NONE + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(hyperparams_text_proto, hyperparams) + hyperparams.op = op_type + return hyperparams_builder.KerasLayerHyperparams(hyperparams) + + def test_get_boxes_with_five_classes(self): + def graph_fn(image_features): + mask_box_predictor = ( + box_predictor_builder.build_mask_rcnn_keras_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams=self._build_hyperparams(), + freeze_batchnorm=False, + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + )) + box_predictions = mask_box_predictor( + [image_features], + prediction_stage=2) + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [2, 1, 5, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6]) + + def test_get_boxes_with_five_classes_share_box_across_classes(self): + def graph_fn(image_features): + mask_box_predictor = ( + box_predictor_builder.build_mask_rcnn_keras_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams=self._build_hyperparams(), + freeze_batchnorm=False, + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + share_box_across_classes=True + )) + box_predictions = mask_box_predictor( + [image_features], + prediction_stage=2) + return (box_predictions[box_predictor.BOX_ENCODINGS], + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + (box_encodings, + class_predictions_with_background) = self.execute(graph_fn, + [image_features]) + self.assertAllEqual(box_encodings.shape, [2, 1, 1, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [2, 1, 6]) + + def test_get_instance_masks(self): + def graph_fn(image_features): + mask_box_predictor = ( + box_predictor_builder.build_mask_rcnn_keras_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams=self._build_hyperparams(), + freeze_batchnorm=False, + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4, + conv_hyperparams=self._build_hyperparams( + op_type=hyperparams_pb2.Hyperparams.CONV), + predict_instance_masks=True)) + box_predictions = mask_box_predictor( + [image_features], + prediction_stage=3) + return (box_predictions[box_predictor.MASK_PREDICTIONS],) + image_features = np.random.rand(2, 7, 7, 3).astype(np.float32) + mask_predictions = self.execute(graph_fn, [image_features]) + self.assertAllEqual(mask_predictions.shape, [2, 1, 5, 14, 14]) + + def test_do_not_return_instance_masks_without_request(self): + image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) + mask_box_predictor = ( + box_predictor_builder.build_mask_rcnn_keras_box_predictor( + is_training=False, + num_classes=5, + fc_hyperparams=self._build_hyperparams(), + freeze_batchnorm=False, + use_dropout=False, + dropout_keep_prob=0.5, + box_code_size=4)) + box_predictions = mask_box_predictor( + [image_features], + prediction_stage=2) + self.assertEqual(len(box_predictions), 2) + self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions) + self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND + in box_predictions) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/rfcn_box_predictor.py b/predictors/rfcn_box_predictor.py new file mode 100644 index 0000000..a63ce20 --- /dev/null +++ b/predictors/rfcn_box_predictor.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""RFCN Box Predictor.""" +import tensorflow as tf +from object_detection.core import box_predictor +from object_detection.utils import ops + +slim = tf.contrib.slim + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class RfcnBoxPredictor(box_predictor.BoxPredictor): + """RFCN Box Predictor. + + Applies a position sensitive ROI pooling on position sensitive feature maps to + predict classes and refined locations. See https://arxiv.org/abs/1605.06409 + for details. + + This is used for the second stage of the RFCN meta architecture. Notice that + locations are *not* shared across classes, thus for each anchor, a separate + prediction is made for each class. + """ + + def __init__(self, + is_training, + num_classes, + conv_hyperparams_fn, + num_spatial_bins, + depth, + crop_size, + box_code_size): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams_fn: A function to construct tf-slim arg_scope with + hyperparameters for convolutional layers. + num_spatial_bins: A list of two integers `[spatial_bins_y, + spatial_bins_x]`. + depth: Target depth to reduce the input feature maps to. + crop_size: A list of two integers `[crop_height, crop_width]`. + box_code_size: Size of encoding for each box. + """ + super(RfcnBoxPredictor, self).__init__(is_training, num_classes) + self._conv_hyperparams_fn = conv_hyperparams_fn + self._num_spatial_bins = num_spatial_bins + self._depth = depth + self._crop_size = crop_size + self._box_code_size = box_code_size + + @property + def num_classes(self): + return self._num_classes + + def _predict(self, image_features, num_predictions_per_location, + proposal_boxes): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + num_predictions_per_location: A list of integers representing the number + of box predictions to be made per spatial location for each feature map. + Currently, this must be set to [1], or an error will be raised. + proposal_boxes: A float tensor of shape [batch_size, num_proposals, + box_code_size]. + + Returns: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. Each entry in the + list corresponds to a feature map in the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + + Raises: + ValueError: if num_predictions_per_location is not 1 or if + len(image_features) is not 1. + """ + if (len(num_predictions_per_location) != 1 or + num_predictions_per_location[0] != 1): + raise ValueError('Currently RfcnBoxPredictor only supports ' + 'predicting a single box per class per location.') + if len(image_features) != 1: + raise ValueError('length of `image_features` must be 1. Found {}'. + format(len(image_features))) + image_feature = image_features[0] + num_predictions_per_location = num_predictions_per_location[0] + batch_size = tf.shape(proposal_boxes)[0] + num_boxes = tf.shape(proposal_boxes)[1] + net = image_feature + with slim.arg_scope(self._conv_hyperparams_fn()): + net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth') + # Location predictions. + location_feature_map_depth = (self._num_spatial_bins[0] * + self._num_spatial_bins[1] * + self.num_classes * + self._box_code_size) + location_feature_map = slim.conv2d(net, location_feature_map_depth, + [1, 1], activation_fn=None, + scope='refined_locations') + box_encodings = ops.batch_position_sensitive_crop_regions( + location_feature_map, + boxes=proposal_boxes, + crop_size=self._crop_size, + num_spatial_bins=self._num_spatial_bins, + global_pool=True) + box_encodings = tf.squeeze(box_encodings, axis=[2, 3]) + box_encodings = tf.reshape(box_encodings, + [batch_size * num_boxes, 1, self.num_classes, + self._box_code_size]) + + # Class predictions. + total_classes = self.num_classes + 1 # Account for background class. + class_feature_map_depth = (self._num_spatial_bins[0] * + self._num_spatial_bins[1] * + total_classes) + class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1], + activation_fn=None, + scope='class_predictions') + class_predictions_with_background = ( + ops.batch_position_sensitive_crop_regions( + class_feature_map, + boxes=proposal_boxes, + crop_size=self._crop_size, + num_spatial_bins=self._num_spatial_bins, + global_pool=True)) + class_predictions_with_background = tf.squeeze( + class_predictions_with_background, axis=[2, 3]) + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size * num_boxes, 1, total_classes]) + + return {BOX_ENCODINGS: [box_encodings], + CLASS_PREDICTIONS_WITH_BACKGROUND: + [class_predictions_with_background]} diff --git a/predictors/rfcn_box_predictor_test.py b/predictors/rfcn_box_predictor_test.py new file mode 100644 index 0000000..104246d --- /dev/null +++ b/predictors/rfcn_box_predictor_test.py @@ -0,0 +1,77 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.rfcn_box_predictor.""" +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors import rfcn_box_predictor as box_predictor +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class RfcnBoxPredictorTest(test_case.TestCase): + + def _build_arg_scope_with_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.build(conv_hyperparams, is_training=True) + + def test_get_correct_box_encoding_and_class_prediction_shapes(self): + + def graph_fn(image_features, proposal_boxes): + rfcn_box_predictor = box_predictor.RfcnBoxPredictor( + is_training=False, + num_classes=2, + conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), + num_spatial_bins=[3, 3], + depth=4, + crop_size=[12, 12], + box_code_size=4 + ) + box_predictions = rfcn_box_predictor.predict( + [image_features], num_predictions_per_location=[1], + scope='BoxPredictor', + proposal_boxes=proposal_boxes) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + proposal_boxes = np.random.rand(4, 2, 4).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features, proposal_boxes]) + + self.assertAllEqual(box_encodings.shape, [8, 1, 2, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [8, 1, 3]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/predictors/rfcn_keras_box_predictor.py b/predictors/rfcn_keras_box_predictor.py new file mode 100644 index 0000000..3329e0a --- /dev/null +++ b/predictors/rfcn_keras_box_predictor.py @@ -0,0 +1,204 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""RFCN Box Predictor.""" +import tensorflow as tf +from object_detection.core import box_predictor +from object_detection.utils import ops + +BOX_ENCODINGS = box_predictor.BOX_ENCODINGS +CLASS_PREDICTIONS_WITH_BACKGROUND = ( + box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND) +MASK_PREDICTIONS = box_predictor.MASK_PREDICTIONS + + +class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor): + """RFCN Box Predictor. + + Applies a position sensitive ROI pooling on position sensitive feature maps to + predict classes and refined locations. See https://arxiv.org/abs/1605.06409 + for details. + + This is used for the second stage of the RFCN meta architecture. Notice that + locations are *not* shared across classes, thus for each anchor, a separate + prediction is made for each class. + """ + + def __init__(self, + is_training, + num_classes, + conv_hyperparams, + freeze_batchnorm, + num_spatial_bins, + depth, + crop_size, + box_code_size, + name=None): + """Constructor. + + Args: + is_training: Indicates whether the BoxPredictor is in training mode. + num_classes: number of classes. Note that num_classes *does not* + include the background category, so if groundtruth labels take values + in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the + assigned classification targets can range from {0,... K}). + conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object + containing hyperparameters for convolution ops. + freeze_batchnorm: Whether to freeze batch norm parameters during + training or not. When training with a small batch size (e.g. 1), it is + desirable to freeze batch norm update and use pretrained batch norm + params. + num_spatial_bins: A list of two integers `[spatial_bins_y, + spatial_bins_x]`. + depth: Target depth to reduce the input feature maps to. + crop_size: A list of two integers `[crop_height, crop_width]`. + box_code_size: Size of encoding for each box. + name: A string name scope to assign to the box predictor. If `None`, Keras + will auto-generate one from the class name. + """ + super(RfcnKerasBoxPredictor, self).__init__( + is_training, num_classes, freeze_batchnorm=freeze_batchnorm, + inplace_batchnorm_update=False, name=name) + self._freeze_batchnorm = freeze_batchnorm + self._conv_hyperparams = conv_hyperparams + self._num_spatial_bins = num_spatial_bins + self._depth = depth + self._crop_size = crop_size + self._box_code_size = box_code_size + + # Build the shared layers used for both heads + self._shared_conv_layers = [] + self._shared_conv_layers.append( + tf.keras.layers.Conv2D( + self._depth, + [1, 1], + padding='SAME', + name='reduce_depth_conv', + **self._conv_hyperparams.params())) + self._shared_conv_layers.append( + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='reduce_depth_batchnorm')) + self._shared_conv_layers.append( + self._conv_hyperparams.build_activation_layer( + name='reduce_depth_activation')) + + self._box_encoder_layers = [] + location_feature_map_depth = (self._num_spatial_bins[0] * + self._num_spatial_bins[1] * + self.num_classes * + self._box_code_size) + self._box_encoder_layers.append( + tf.keras.layers.Conv2D( + location_feature_map_depth, + [1, 1], + padding='SAME', + name='refined_locations_conv', + **self._conv_hyperparams.params())) + self._box_encoder_layers.append( + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='refined_locations_batchnorm')) + + self._class_predictor_layers = [] + self._total_classes = self.num_classes + 1 # Account for background class. + class_feature_map_depth = (self._num_spatial_bins[0] * + self._num_spatial_bins[1] * + self._total_classes) + self._class_predictor_layers.append( + tf.keras.layers.Conv2D( + class_feature_map_depth, + [1, 1], + padding='SAME', + name='class_predictions_conv', + **self._conv_hyperparams.params())) + self._class_predictor_layers.append( + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm), + name='class_predictions_batchnorm')) + + @property + def num_classes(self): + return self._num_classes + + def _predict(self, image_features, proposal_boxes, **kwargs): + """Computes encoded object locations and corresponding confidences. + + Args: + image_features: A list of float tensors of shape [batch_size, height_i, + width_i, channels_i] containing features for a batch of images. + proposal_boxes: A float tensor of shape [batch_size, num_proposals, + box_code_size]. + **kwargs: Unused Keyword args + + Returns: + box_encodings: A list of float tensors of shape + [batch_size, num_anchors_i, q, code_size] representing the location of + the objects, where q is 1 or the number of classes. Each entry in the + list corresponds to a feature map in the input `image_features` list. + class_predictions_with_background: A list of float tensors of shape + [batch_size, num_anchors_i, num_classes + 1] representing the class + predictions for the proposals. Each entry in the list corresponds to a + feature map in the input `image_features` list. + + Raises: + ValueError: if num_predictions_per_location is not 1 or if + len(image_features) is not 1. + """ + if len(image_features) != 1: + raise ValueError('length of `image_features` must be 1. Found {}'. + format(len(image_features))) + image_feature = image_features[0] + batch_size = tf.shape(proposal_boxes)[0] + num_boxes = tf.shape(proposal_boxes)[1] + net = image_feature + for layer in self._shared_conv_layers: + net = layer(net) + + # Location predictions. + box_net = net + for layer in self._box_encoder_layers: + box_net = layer(box_net) + box_encodings = ops.batch_position_sensitive_crop_regions( + box_net, + boxes=proposal_boxes, + crop_size=self._crop_size, + num_spatial_bins=self._num_spatial_bins, + global_pool=True) + box_encodings = tf.squeeze(box_encodings, axis=[2, 3]) + box_encodings = tf.reshape(box_encodings, + [batch_size * num_boxes, 1, self.num_classes, + self._box_code_size]) + + # Class predictions. + class_net = net + for layer in self._class_predictor_layers: + class_net = layer(class_net) + class_predictions_with_background = ( + ops.batch_position_sensitive_crop_regions( + class_net, + boxes=proposal_boxes, + crop_size=self._crop_size, + num_spatial_bins=self._num_spatial_bins, + global_pool=True)) + class_predictions_with_background = tf.squeeze( + class_predictions_with_background, axis=[2, 3]) + class_predictions_with_background = tf.reshape( + class_predictions_with_background, + [batch_size * num_boxes, 1, self._total_classes]) + + return {BOX_ENCODINGS: [box_encodings], + CLASS_PREDICTIONS_WITH_BACKGROUND: + [class_predictions_with_background]} diff --git a/predictors/rfcn_keras_box_predictor_test.py b/predictors/rfcn_keras_box_predictor_test.py new file mode 100644 index 0000000..30a5bb5 --- /dev/null +++ b/predictors/rfcn_keras_box_predictor_test.py @@ -0,0 +1,77 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.predictors.rfcn_box_predictor.""" +import numpy as np +import tensorflow as tf + +from google.protobuf import text_format +from object_detection.builders import hyperparams_builder +from object_detection.predictors import rfcn_keras_box_predictor as box_predictor +from object_detection.protos import hyperparams_pb2 +from object_detection.utils import test_case + + +class RfcnKerasBoxPredictorTest(test_case.TestCase): + + def _build_conv_hyperparams(self): + conv_hyperparams = hyperparams_pb2.Hyperparams() + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + truncated_normal_initializer { + } + } + """ + text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) + return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams) + + def test_get_correct_box_encoding_and_class_prediction_shapes(self): + + def graph_fn(image_features, proposal_boxes): + rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor( + is_training=False, + num_classes=2, + conv_hyperparams=self._build_conv_hyperparams(), + freeze_batchnorm=False, + num_spatial_bins=[3, 3], + depth=4, + crop_size=[12, 12], + box_code_size=4 + ) + box_predictions = rfcn_box_predictor( + [image_features], + proposal_boxes=proposal_boxes) + box_encodings = tf.concat( + box_predictions[box_predictor.BOX_ENCODINGS], axis=1) + class_predictions_with_background = tf.concat( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1) + return (box_encodings, class_predictions_with_background) + + image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) + proposal_boxes = np.random.rand(4, 2, 4).astype(np.float32) + (box_encodings, class_predictions_with_background) = self.execute( + graph_fn, [image_features, proposal_boxes]) + + self.assertAllEqual(box_encodings.shape, [8, 1, 2, 4]) + self.assertAllEqual(class_predictions_with_background.shape, [8, 1, 3]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/protos/anchor_generator.proto b/protos/anchor_generator.proto new file mode 100644 index 0000000..9608ca4 --- /dev/null +++ b/protos/anchor_generator.proto @@ -0,0 +1,19 @@ +syntax = "proto2"; + +package object_detection.protos; + +import "object_detection/protos/flexible_grid_anchor_generator.proto"; +import "object_detection/protos/grid_anchor_generator.proto"; +import "object_detection/protos/multiscale_anchor_generator.proto"; +import "object_detection/protos/ssd_anchor_generator.proto"; + +// Configuration proto for the anchor generator to use in the object detection +// pipeline. See core/anchor_generator.py for details. +message AnchorGenerator { + oneof anchor_generator_oneof { + GridAnchorGenerator grid_anchor_generator = 1; + SsdAnchorGenerator ssd_anchor_generator = 2; + MultiscaleAnchorGenerator multiscale_anchor_generator = 3; + FlexibleGridAnchorGenerator flexible_grid_anchor_generator = 4; + } +} diff --git a/protos/anchor_generator_pb2.py b/protos/anchor_generator_pb2.py new file mode 100644 index 0000000..bea9978 --- /dev/null +++ b/protos/anchor_generator_pb2.py @@ -0,0 +1,114 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: object_detection/protos/anchor_generator.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from object_detection.protos import flexible_grid_anchor_generator_pb2 as object__detection_dot_protos_dot_flexible__grid__anchor__generator__pb2 +from object_detection.protos import grid_anchor_generator_pb2 as object__detection_dot_protos_dot_grid__anchor__generator__pb2 +from object_detection.protos import multiscale_anchor_generator_pb2 as object__detection_dot_protos_dot_multiscale__anchor__generator__pb2 +from object_detection.protos import ssd_anchor_generator_pb2 as object__detection_dot_protos_dot_ssd__anchor__generator__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='object_detection/protos/anchor_generator.proto', + package='object_detection.protos', + syntax='proto2', + serialized_pb=_b('\n.object_detection/protos/anchor_generator.proto\x12\x17object_detection.protos\x1a.object_detection.protos.WeightSharedConvolutionalBoxPredictorH\x00\x42\x15\n\x13\x62ox_predictor_oneof\"\xaf\x04\n\x19\x43onvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x14\n\tmin_depth\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\tmax_depth\x18\x03 \x01(\x05:\x01\x30\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x19\n\x0buse_dropout\x18\x05 \x01(\x08:\x04true\x12%\n\x18\x64ropout_keep_probability\x18\x06 \x01(\x02:\x03\x30.8\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x31\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12&\n\x17\x61pply_sigmoid_to_scores\x18\t \x01(\x08:\x05\x66\x61lse\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\x12\x1c\n\ruse_depthwise\x18\x0b \x01(\x08:\x05\x66\x61lse\x12j\n\x18\x62ox_encodings_clip_range\x18\x0c \x01(\x0b\x32H.object_detection.protos.ConvolutionalBoxPredictor.BoxEncodingsClipRange\x1a\x31\n\x15\x42oxEncodingsClipRange\x12\x0b\n\x03min\x18\x01 \x01(\x02\x12\x0b\n\x03max\x18\x02 \x01(\x02\"\xcc\x05\n%WeightSharedConvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x10\n\x05\x64\x65pth\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x33\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\x12\x1a\n\x0buse_dropout\x18\x0b \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x0c \x01(\x02:\x03\x30.8\x12%\n\x16share_prediction_tower\x18\r \x01(\x08:\x05\x66\x61lse\x12\x1c\n\ruse_depthwise\x18\x0e \x01(\x08:\x05\x66\x61lse\x12p\n\x0fscore_converter\x18\x10 \x01(\x0e\x32M.object_detection.protos.WeightSharedConvolutionalBoxPredictor.ScoreConverter:\x08IDENTITY\x12v\n\x18\x62ox_encodings_clip_range\x18\x11 \x01(\x0b\x32T.object_detection.protos.WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange\x1a\x31\n\x15\x42oxEncodingsClipRange\x12\x0b\n\x03min\x18\x01 \x01(\x02\x12\x0b\n\x03max\x18\x02 \x01(\x02\"+\n\x0eScoreConverter\x12\x0c\n\x08IDENTITY\x10\x00\x12\x0b\n\x07SIGMOID\x10\x01\"\xbf\x04\n\x14MaskRCNNBoxPredictor\x12<\n\x0e\x66\x63_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0buse_dropout\x18\x02 \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x03 \x01(\x02:\x03\x30.5\x12\x18\n\rbox_code_size\x18\x04 \x01(\x05:\x01\x34\x12>\n\x10\x63onv_hyperparams\x18\x05 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12%\n\x16predict_instance_masks\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\'\n\x1amask_prediction_conv_depth\x18\x07 \x01(\x05:\x03\x32\x35\x36\x12 \n\x11predict_keypoints\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0bmask_height\x18\t \x01(\x05:\x02\x31\x35\x12\x16\n\nmask_width\x18\n \x01(\x05:\x02\x31\x35\x12*\n\x1fmask_prediction_num_conv_layers\x18\x0b \x01(\x05:\x01\x32\x12\'\n\x18masks_are_class_agnostic\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\'\n\x18share_box_across_classes\x18\r \x01(\x08:\x05\x66\x61lse\x12+\n\x1c\x63onvolve_then_upsample_masks\x18\x0e \x01(\x08:\x05\x66\x61lse\"\xf9\x01\n\x10RfcnBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\"\n\x17num_spatial_bins_height\x18\x02 \x01(\x05:\x01\x33\x12!\n\x16num_spatial_bins_width\x18\x03 \x01(\x05:\x01\x33\x12\x13\n\x05\x64\x65pth\x18\x04 \x01(\x05:\x04\x31\x30\x32\x34\x12\x18\n\rbox_code_size\x18\x05 \x01(\x05:\x01\x34\x12\x17\n\x0b\x63rop_height\x18\x06 \x01(\x05:\x02\x31\x32\x12\x16\n\ncrop_width\x18\x07 \x01(\x05:\x02\x31\x32') + , + dependencies=[object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,]) +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + + + +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_SCORECONVERTER = _descriptor.EnumDescriptor( + name='ScoreConverter', + full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.ScoreConverter', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='IDENTITY', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='SIGMOID', index=1, number=1, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=1754, + serialized_end=1797, +) +_sym_db.RegisterEnumDescriptor(_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_SCORECONVERTER) + + +_BOXPREDICTOR = _descriptor.Descriptor( + name='BoxPredictor', + full_name='object_detection.protos.BoxPredictor', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.convolutional_box_predictor', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mask_rcnn_box_predictor', full_name='object_detection.protos.BoxPredictor.mask_rcnn_box_predictor', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='rfcn_box_predictor', full_name='object_detection.protos.BoxPredictor.rfcn_box_predictor', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='weight_shared_convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.weight_shared_convolutional_box_predictor', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='box_predictor_oneof', full_name='object_detection.protos.BoxPredictor.box_predictor_oneof', + index=0, containing_type=None, fields=[]), + ], + serialized_start=116, + serialized_end=516, +) + + +_CONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE = _descriptor.Descriptor( + name='BoxEncodingsClipRange', + full_name='object_detection.protos.ConvolutionalBoxPredictor.BoxEncodingsClipRange', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='min', full_name='object_detection.protos.ConvolutionalBoxPredictor.BoxEncodingsClipRange.min', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='max', full_name='object_detection.protos.ConvolutionalBoxPredictor.BoxEncodingsClipRange.max', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1029, + serialized_end=1078, +) + +_CONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor( + name='ConvolutionalBoxPredictor', + full_name='object_detection.protos.ConvolutionalBoxPredictor', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='conv_hyperparams', full_name='object_detection.protos.ConvolutionalBoxPredictor.conv_hyperparams', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='min_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.min_depth', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='max_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.max_depth', index=2, + number=3, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='num_layers_before_predictor', full_name='object_detection.protos.ConvolutionalBoxPredictor.num_layers_before_predictor', index=3, + number=4, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='use_dropout', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_dropout', index=4, + number=5, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=True, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dropout_keep_probability', full_name='object_detection.protos.ConvolutionalBoxPredictor.dropout_keep_probability', index=5, + number=6, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0.8), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='kernel_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.kernel_size', index=6, + number=7, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=1, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_code_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.box_code_size', index=7, + number=8, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=4, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='apply_sigmoid_to_scores', full_name='object_detection.protos.ConvolutionalBoxPredictor.apply_sigmoid_to_scores', index=8, + number=9, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='class_prediction_bias_init', full_name='object_detection.protos.ConvolutionalBoxPredictor.class_prediction_bias_init', index=9, + number=10, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='use_depthwise', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_depthwise', index=10, + number=11, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_encodings_clip_range', full_name='object_detection.protos.ConvolutionalBoxPredictor.box_encodings_clip_range', index=11, + number=12, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_CONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE, ], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=519, + serialized_end=1078, +) + + +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE = _descriptor.Descriptor( + name='BoxEncodingsClipRange', + full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='min', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange.min', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='max', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange.max', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1029, + serialized_end=1078, +) + +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor( + name='WeightSharedConvolutionalBoxPredictor', + full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='conv_hyperparams', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.conv_hyperparams', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='num_layers_before_predictor', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.num_layers_before_predictor', index=1, + number=4, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='depth', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.depth', index=2, + number=2, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='kernel_size', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.kernel_size', index=3, + number=7, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=3, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_code_size', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.box_code_size', index=4, + number=8, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=4, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='class_prediction_bias_init', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.class_prediction_bias_init', index=5, + number=10, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='use_dropout', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.use_dropout', index=6, + number=11, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dropout_keep_probability', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.dropout_keep_probability', index=7, + number=12, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0.8), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='share_prediction_tower', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.share_prediction_tower', index=8, + number=13, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='use_depthwise', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.use_depthwise', index=9, + number=14, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='score_converter', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.score_converter', index=10, + number=16, type=14, cpp_type=8, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_encodings_clip_range', full_name='object_detection.protos.WeightSharedConvolutionalBoxPredictor.box_encodings_clip_range', index=11, + number=17, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE, ], + enum_types=[ + _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_SCORECONVERTER, + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1081, + serialized_end=1797, +) + + +_MASKRCNNBOXPREDICTOR = _descriptor.Descriptor( + name='MaskRCNNBoxPredictor', + full_name='object_detection.protos.MaskRCNNBoxPredictor', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='fc_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.fc_hyperparams', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='use_dropout', full_name='object_detection.protos.MaskRCNNBoxPredictor.use_dropout', index=1, + number=2, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dropout_keep_probability', full_name='object_detection.protos.MaskRCNNBoxPredictor.dropout_keep_probability', index=2, + number=3, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0.5), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_code_size', full_name='object_detection.protos.MaskRCNNBoxPredictor.box_code_size', index=3, + number=4, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=4, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='conv_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.conv_hyperparams', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='predict_instance_masks', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_instance_masks', index=5, + number=6, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mask_prediction_conv_depth', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_conv_depth', index=6, + number=7, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=256, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='predict_keypoints', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_keypoints', index=7, + number=8, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mask_height', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_height', index=8, + number=9, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=15, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mask_width', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_width', index=9, + number=10, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=15, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mask_prediction_num_conv_layers', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_num_conv_layers', index=10, + number=11, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=2, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='masks_are_class_agnostic', full_name='object_detection.protos.MaskRCNNBoxPredictor.masks_are_class_agnostic', index=11, + number=12, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='share_box_across_classes', full_name='object_detection.protos.MaskRCNNBoxPredictor.share_box_across_classes', index=12, + number=13, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='convolve_then_upsample_masks', full_name='object_detection.protos.MaskRCNNBoxPredictor.convolve_then_upsample_masks', index=13, + number=14, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1800, + serialized_end=2375, +) + + +_RFCNBOXPREDICTOR = _descriptor.Descriptor( + name='RfcnBoxPredictor', + full_name='object_detection.protos.RfcnBoxPredictor', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='conv_hyperparams', full_name='object_detection.protos.RfcnBoxPredictor.conv_hyperparams', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='num_spatial_bins_height', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_height', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=3, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='num_spatial_bins_width', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_width', index=2, + number=3, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=3, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='depth', full_name='object_detection.protos.RfcnBoxPredictor.depth', index=3, + number=4, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=1024, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='box_code_size', full_name='object_detection.protos.RfcnBoxPredictor.box_code_size', index=4, + number=5, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=4, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='crop_height', full_name='object_detection.protos.RfcnBoxPredictor.crop_height', index=5, + number=6, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=12, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='crop_width', full_name='object_detection.protos.RfcnBoxPredictor.crop_width', index=6, + number=7, type=5, cpp_type=1, label=1, + has_default_value=True, default_value=12, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2378, + serialized_end=2627, +) + +_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].message_type = _CONVOLUTIONALBOXPREDICTOR +_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].message_type = _MASKRCNNBOXPREDICTOR +_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].message_type = _RFCNBOXPREDICTOR +_BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor'].message_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR +_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( + _BOXPREDICTOR.fields_by_name['convolutional_box_predictor']) +_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] +_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( + _BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor']) +_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] +_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( + _BOXPREDICTOR.fields_by_name['rfcn_box_predictor']) +_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] +_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( + _BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor']) +_BOXPREDICTOR.fields_by_name['weight_shared_convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] +_CONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE.containing_type = _CONVOLUTIONALBOXPREDICTOR +_CONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS +_CONVOLUTIONALBOXPREDICTOR.fields_by_name['box_encodings_clip_range'].message_type = _CONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE.containing_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR.fields_by_name['score_converter'].enum_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_SCORECONVERTER +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR.fields_by_name['box_encodings_clip_range'].message_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE +_WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_SCORECONVERTER.containing_type = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR +_MASKRCNNBOXPREDICTOR.fields_by_name['fc_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS +_MASKRCNNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS +_RFCNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS +DESCRIPTOR.message_types_by_name['BoxPredictor'] = _BOXPREDICTOR +DESCRIPTOR.message_types_by_name['ConvolutionalBoxPredictor'] = _CONVOLUTIONALBOXPREDICTOR +DESCRIPTOR.message_types_by_name['WeightSharedConvolutionalBoxPredictor'] = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR +DESCRIPTOR.message_types_by_name['MaskRCNNBoxPredictor'] = _MASKRCNNBOXPREDICTOR +DESCRIPTOR.message_types_by_name['RfcnBoxPredictor'] = _RFCNBOXPREDICTOR + +BoxPredictor = _reflection.GeneratedProtocolMessageType('BoxPredictor', (_message.Message,), dict( + DESCRIPTOR = _BOXPREDICTOR, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.BoxPredictor) + )) +_sym_db.RegisterMessage(BoxPredictor) + +ConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('ConvolutionalBoxPredictor', (_message.Message,), dict( + + BoxEncodingsClipRange = _reflection.GeneratedProtocolMessageType('BoxEncodingsClipRange', (_message.Message,), dict( + DESCRIPTOR = _CONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ConvolutionalBoxPredictor.BoxEncodingsClipRange) + )) + , + DESCRIPTOR = _CONVOLUTIONALBOXPREDICTOR, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ConvolutionalBoxPredictor) + )) +_sym_db.RegisterMessage(ConvolutionalBoxPredictor) +_sym_db.RegisterMessage(ConvolutionalBoxPredictor.BoxEncodingsClipRange) + +WeightSharedConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('WeightSharedConvolutionalBoxPredictor', (_message.Message,), dict( + + BoxEncodingsClipRange = _reflection.GeneratedProtocolMessageType('BoxEncodingsClipRange', (_message.Message,), dict( + DESCRIPTOR = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR_BOXENCODINGSCLIPRANGE, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange) + )) + , + DESCRIPTOR = _WEIGHTSHAREDCONVOLUTIONALBOXPREDICTOR, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.WeightSharedConvolutionalBoxPredictor) + )) +_sym_db.RegisterMessage(WeightSharedConvolutionalBoxPredictor) +_sym_db.RegisterMessage(WeightSharedConvolutionalBoxPredictor.BoxEncodingsClipRange) + +MaskRCNNBoxPredictor = _reflection.GeneratedProtocolMessageType('MaskRCNNBoxPredictor', (_message.Message,), dict( + DESCRIPTOR = _MASKRCNNBOXPREDICTOR, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.MaskRCNNBoxPredictor) + )) +_sym_db.RegisterMessage(MaskRCNNBoxPredictor) + +RfcnBoxPredictor = _reflection.GeneratedProtocolMessageType('RfcnBoxPredictor', (_message.Message,), dict( + DESCRIPTOR = _RFCNBOXPREDICTOR, + __module__ = 'object_detection.protos.box_predictor_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.RfcnBoxPredictor) + )) +_sym_db.RegisterMessage(RfcnBoxPredictor) + + +# @@protoc_insertion_point(module_scope) diff --git a/protos/calibration.proto b/protos/calibration.proto new file mode 100644 index 0000000..6025117 --- /dev/null +++ b/protos/calibration.proto @@ -0,0 +1,90 @@ +// These protos contain the calibration parameters necessary for transforming +// a model's original detection scores or logits. The parameters result from +// fitting a calibration function on the model's outputs. + +syntax = "proto2"; + +package object_detection.protos; + +// Message wrapper for various calibration configurations. +message CalibrationConfig { + oneof calibrator { + // Class-agnostic calibration via linear interpolation (usually output from + // isotonic regression). + FunctionApproximation function_approximation = 1; + + // Per-class calibration via linear interpolation. + ClassIdFunctionApproximations class_id_function_approximations = 2; + + // Class-agnostic sigmoid calibration. + SigmoidCalibration sigmoid_calibration = 3; + + // Per-class sigmoid calibration. + ClassIdSigmoidCalibrations class_id_sigmoid_calibrations = 4; + + // Temperature scaling calibration. + TemperatureScalingCalibration temperature_scaling_calibration = 5; + } +} + +// Message for class-agnostic domain/range mapping for function +// approximations. +message FunctionApproximation { + // Message mapping class labels to indices + optional XYPairs x_y_pairs = 1; +} + +// Message for class-specific domain/range mapping for function +// approximations. +message ClassIdFunctionApproximations { + // Message mapping class ids to indices. + map class_id_xy_pairs_map = 1; +} + +// Message for class-agnostic Sigmoid Calibration. +message SigmoidCalibration { + // Message mapping class index to Sigmoid Parameters + optional SigmoidParameters sigmoid_parameters = 1; +} + +// Message for class-specific Sigmoid Calibration. +message ClassIdSigmoidCalibrations { + // Message mapping class index to Sigmoid Parameters. + map class_id_sigmoid_parameters_map = 1; +} + +// Message for Temperature Scaling Calibration. +message TemperatureScalingCalibration { + optional float scaler = 1; +} + +// Description of data used to fit the calibration model. CLASS_SPECIFIC +// indicates that the calibration parameters are derived from detections +// pertaining to a single class. ALL_CLASSES indicates that parameters were +// obtained by fitting a model on detections from all classes (including the +// background class). +enum TrainingDataType { + DATA_TYPE_UNKNOWN = 0; + ALL_CLASSES = 1; + CLASS_SPECIFIC = 2; +} + +// Message to store a domain/range pair for function to be approximated. +message XYPairs { + message XYPair { + optional float x = 1; + optional float y = 2; + } + + // Sequence of x/y pairs for function approximation. + repeated XYPair x_y_pair = 1; + + // Description of data used to fit the calibration model. + optional TrainingDataType training_data_type = 2; +} + +// Message defining parameters for sigmoid calibration. +message SigmoidParameters { + optional float a = 1 [default = -1.0]; + optional float b = 2 [default = 0.0]; +} diff --git a/protos/calibration_pb2.py b/protos/calibration_pb2.py new file mode 100644 index 0000000..3c381f2 --- /dev/null +++ b/protos/calibration_pb2.py @@ -0,0 +1,589 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: object_detection/protos/calibration.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='object_detection/protos/calibration.proto', + package='object_detection.protos', + syntax='proto2', + serialized_pb=_b('\n)object_detection/protos/calibration.proto\x12\x17object_detection.protos\"\xe4\x03\n\x11\x43\x61librationConfig\x12P\n\x16\x66unction_approximation\x18\x01 \x01(\x0b\x32..object_detection.protos.FunctionApproximationH\x00\x12\x62\n class_id_function_approximations\x18\x02 \x01(\x0b\x32\x36.object_detection.protos.ClassIdFunctionApproximationsH\x00\x12J\n\x13sigmoid_calibration\x18\x03 \x01(\x0b\x32+.object_detection.protos.SigmoidCalibrationH\x00\x12\\\n\x1d\x63lass_id_sigmoid_calibrations\x18\x04 \x01(\x0b\x32\x33.object_detection.protos.ClassIdSigmoidCalibrationsH\x00\x12\x61\n\x1ftemperature_scaling_calibration\x18\x05 \x01(\x0b\x32\x36.object_detection.protos.TemperatureScalingCalibrationH\x00\x42\x0c\n\ncalibrator\"L\n\x15\x46unctionApproximation\x12\x33\n\tx_y_pairs\x18\x01 \x01(\x0b\x32 .object_detection.protos.XYPairs\"\xe9\x01\n\x1d\x43lassIdFunctionApproximations\x12l\n\x15\x63lass_id_xy_pairs_map\x18\x01 \x03(\x0b\x32M.object_detection.protos.ClassIdFunctionApproximations.ClassIdXyPairsMapEntry\x1aZ\n\x16\x43lassIdXyPairsMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\x05\x12/\n\x05value\x18\x02 \x01(\x0b\x32 .object_detection.protos.XYPairs:\x02\x38\x01\"\\\n\x12SigmoidCalibration\x12\x46\n\x12sigmoid_parameters\x18\x01 \x01(\x0b\x32*.object_detection.protos.SigmoidParameters\"\x8b\x02\n\x1a\x43lassIdSigmoidCalibrations\x12}\n\x1f\x63lass_id_sigmoid_parameters_map\x18\x01 \x03(\x0b\x32T.object_detection.protos.ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry\x1an\n ClassIdSigmoidParametersMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\x05\x12\x39\n\x05value\x18\x02 \x01(\x0b\x32*.object_detection.protos.SigmoidParameters:\x02\x38\x01\"/\n\x1dTemperatureScalingCalibration\x12\x0e\n\x06scaler\x18\x01 \x01(\x02\"\xab\x01\n\x07XYPairs\x12\x39\n\x08x_y_pair\x18\x01 \x03(\x0b\x32\'.object_detection.protos.XYPairs.XYPair\x12\x45\n\x12training_data_type\x18\x02 \x01(\x0e\x32).object_detection.protos.TrainingDataType\x1a\x1e\n\x06XYPair\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\"0\n\x11SigmoidParameters\x12\r\n\x01\x61\x18\x01 \x01(\x02:\x02-1\x12\x0c\n\x01\x62\x18\x02 \x01(\x02:\x01\x30*N\n\x10TrainingDataType\x12\x15\n\x11\x44\x41TA_TYPE_UNKNOWN\x10\x00\x12\x0f\n\x0b\x41LL_CLASSES\x10\x01\x12\x12\n\x0e\x43LASS_SPECIFIC\x10\x02') +) +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +_TRAININGDATATYPE = _descriptor.EnumDescriptor( + name='TrainingDataType', + full_name='object_detection.protos.TrainingDataType', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='DATA_TYPE_UNKNOWN', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='ALL_CLASSES', index=1, number=1, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='CLASS_SPECIFIC', index=2, number=2, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=1508, + serialized_end=1586, +) +_sym_db.RegisterEnumDescriptor(_TRAININGDATATYPE) + +TrainingDataType = enum_type_wrapper.EnumTypeWrapper(_TRAININGDATATYPE) +DATA_TYPE_UNKNOWN = 0 +ALL_CLASSES = 1 +CLASS_SPECIFIC = 2 + + + +_CALIBRATIONCONFIG = _descriptor.Descriptor( + name='CalibrationConfig', + full_name='object_detection.protos.CalibrationConfig', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='function_approximation', full_name='object_detection.protos.CalibrationConfig.function_approximation', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='class_id_function_approximations', full_name='object_detection.protos.CalibrationConfig.class_id_function_approximations', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='sigmoid_calibration', full_name='object_detection.protos.CalibrationConfig.sigmoid_calibration', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='class_id_sigmoid_calibrations', full_name='object_detection.protos.CalibrationConfig.class_id_sigmoid_calibrations', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='temperature_scaling_calibration', full_name='object_detection.protos.CalibrationConfig.temperature_scaling_calibration', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='calibrator', full_name='object_detection.protos.CalibrationConfig.calibrator', + index=0, containing_type=None, fields=[]), + ], + serialized_start=71, + serialized_end=555, +) + + +_FUNCTIONAPPROXIMATION = _descriptor.Descriptor( + name='FunctionApproximation', + full_name='object_detection.protos.FunctionApproximation', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='x_y_pairs', full_name='object_detection.protos.FunctionApproximation.x_y_pairs', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=557, + serialized_end=633, +) + + +_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY = _descriptor.Descriptor( + name='ClassIdXyPairsMapEntry', + full_name='object_detection.protos.ClassIdFunctionApproximations.ClassIdXyPairsMapEntry', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='key', full_name='object_detection.protos.ClassIdFunctionApproximations.ClassIdXyPairsMapEntry.key', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='value', full_name='object_detection.protos.ClassIdFunctionApproximations.ClassIdXyPairsMapEntry.value', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')), + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=779, + serialized_end=869, +) + +_CLASSIDFUNCTIONAPPROXIMATIONS = _descriptor.Descriptor( + name='ClassIdFunctionApproximations', + full_name='object_detection.protos.ClassIdFunctionApproximations', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='class_id_xy_pairs_map', full_name='object_detection.protos.ClassIdFunctionApproximations.class_id_xy_pairs_map', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY, ], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=636, + serialized_end=869, +) + + +_SIGMOIDCALIBRATION = _descriptor.Descriptor( + name='SigmoidCalibration', + full_name='object_detection.protos.SigmoidCalibration', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='sigmoid_parameters', full_name='object_detection.protos.SigmoidCalibration.sigmoid_parameters', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=871, + serialized_end=963, +) + + +_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY = _descriptor.Descriptor( + name='ClassIdSigmoidParametersMapEntry', + full_name='object_detection.protos.ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='key', full_name='object_detection.protos.ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry.key', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='value', full_name='object_detection.protos.ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry.value', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')), + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1123, + serialized_end=1233, +) + +_CLASSIDSIGMOIDCALIBRATIONS = _descriptor.Descriptor( + name='ClassIdSigmoidCalibrations', + full_name='object_detection.protos.ClassIdSigmoidCalibrations', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='class_id_sigmoid_parameters_map', full_name='object_detection.protos.ClassIdSigmoidCalibrations.class_id_sigmoid_parameters_map', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY, ], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=966, + serialized_end=1233, +) + + +_TEMPERATURESCALINGCALIBRATION = _descriptor.Descriptor( + name='TemperatureScalingCalibration', + full_name='object_detection.protos.TemperatureScalingCalibration', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='scaler', full_name='object_detection.protos.TemperatureScalingCalibration.scaler', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1235, + serialized_end=1282, +) + + +_XYPAIRS_XYPAIR = _descriptor.Descriptor( + name='XYPair', + full_name='object_detection.protos.XYPairs.XYPair', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='x', full_name='object_detection.protos.XYPairs.XYPair.x', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='y', full_name='object_detection.protos.XYPairs.XYPair.y', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1426, + serialized_end=1456, +) + +_XYPAIRS = _descriptor.Descriptor( + name='XYPairs', + full_name='object_detection.protos.XYPairs', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='x_y_pair', full_name='object_detection.protos.XYPairs.x_y_pair', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='training_data_type', full_name='object_detection.protos.XYPairs.training_data_type', index=1, + number=2, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_XYPAIRS_XYPAIR, ], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1285, + serialized_end=1456, +) + + +_SIGMOIDPARAMETERS = _descriptor.Descriptor( + name='SigmoidParameters', + full_name='object_detection.protos.SigmoidParameters', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='a', full_name='object_detection.protos.SigmoidParameters.a', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(-1), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='b', full_name='object_detection.protos.SigmoidParameters.b', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=True, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1458, + serialized_end=1506, +) + +_CALIBRATIONCONFIG.fields_by_name['function_approximation'].message_type = _FUNCTIONAPPROXIMATION +_CALIBRATIONCONFIG.fields_by_name['class_id_function_approximations'].message_type = _CLASSIDFUNCTIONAPPROXIMATIONS +_CALIBRATIONCONFIG.fields_by_name['sigmoid_calibration'].message_type = _SIGMOIDCALIBRATION +_CALIBRATIONCONFIG.fields_by_name['class_id_sigmoid_calibrations'].message_type = _CLASSIDSIGMOIDCALIBRATIONS +_CALIBRATIONCONFIG.fields_by_name['temperature_scaling_calibration'].message_type = _TEMPERATURESCALINGCALIBRATION +_CALIBRATIONCONFIG.oneofs_by_name['calibrator'].fields.append( + _CALIBRATIONCONFIG.fields_by_name['function_approximation']) +_CALIBRATIONCONFIG.fields_by_name['function_approximation'].containing_oneof = _CALIBRATIONCONFIG.oneofs_by_name['calibrator'] +_CALIBRATIONCONFIG.oneofs_by_name['calibrator'].fields.append( + _CALIBRATIONCONFIG.fields_by_name['class_id_function_approximations']) +_CALIBRATIONCONFIG.fields_by_name['class_id_function_approximations'].containing_oneof = _CALIBRATIONCONFIG.oneofs_by_name['calibrator'] +_CALIBRATIONCONFIG.oneofs_by_name['calibrator'].fields.append( + _CALIBRATIONCONFIG.fields_by_name['sigmoid_calibration']) +_CALIBRATIONCONFIG.fields_by_name['sigmoid_calibration'].containing_oneof = _CALIBRATIONCONFIG.oneofs_by_name['calibrator'] +_CALIBRATIONCONFIG.oneofs_by_name['calibrator'].fields.append( + _CALIBRATIONCONFIG.fields_by_name['class_id_sigmoid_calibrations']) +_CALIBRATIONCONFIG.fields_by_name['class_id_sigmoid_calibrations'].containing_oneof = _CALIBRATIONCONFIG.oneofs_by_name['calibrator'] +_CALIBRATIONCONFIG.oneofs_by_name['calibrator'].fields.append( + _CALIBRATIONCONFIG.fields_by_name['temperature_scaling_calibration']) +_CALIBRATIONCONFIG.fields_by_name['temperature_scaling_calibration'].containing_oneof = _CALIBRATIONCONFIG.oneofs_by_name['calibrator'] +_FUNCTIONAPPROXIMATION.fields_by_name['x_y_pairs'].message_type = _XYPAIRS +_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY.fields_by_name['value'].message_type = _XYPAIRS +_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY.containing_type = _CLASSIDFUNCTIONAPPROXIMATIONS +_CLASSIDFUNCTIONAPPROXIMATIONS.fields_by_name['class_id_xy_pairs_map'].message_type = _CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY +_SIGMOIDCALIBRATION.fields_by_name['sigmoid_parameters'].message_type = _SIGMOIDPARAMETERS +_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY.fields_by_name['value'].message_type = _SIGMOIDPARAMETERS +_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY.containing_type = _CLASSIDSIGMOIDCALIBRATIONS +_CLASSIDSIGMOIDCALIBRATIONS.fields_by_name['class_id_sigmoid_parameters_map'].message_type = _CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY +_XYPAIRS_XYPAIR.containing_type = _XYPAIRS +_XYPAIRS.fields_by_name['x_y_pair'].message_type = _XYPAIRS_XYPAIR +_XYPAIRS.fields_by_name['training_data_type'].enum_type = _TRAININGDATATYPE +DESCRIPTOR.message_types_by_name['CalibrationConfig'] = _CALIBRATIONCONFIG +DESCRIPTOR.message_types_by_name['FunctionApproximation'] = _FUNCTIONAPPROXIMATION +DESCRIPTOR.message_types_by_name['ClassIdFunctionApproximations'] = _CLASSIDFUNCTIONAPPROXIMATIONS +DESCRIPTOR.message_types_by_name['SigmoidCalibration'] = _SIGMOIDCALIBRATION +DESCRIPTOR.message_types_by_name['ClassIdSigmoidCalibrations'] = _CLASSIDSIGMOIDCALIBRATIONS +DESCRIPTOR.message_types_by_name['TemperatureScalingCalibration'] = _TEMPERATURESCALINGCALIBRATION +DESCRIPTOR.message_types_by_name['XYPairs'] = _XYPAIRS +DESCRIPTOR.message_types_by_name['SigmoidParameters'] = _SIGMOIDPARAMETERS +DESCRIPTOR.enum_types_by_name['TrainingDataType'] = _TRAININGDATATYPE + +CalibrationConfig = _reflection.GeneratedProtocolMessageType('CalibrationConfig', (_message.Message,), dict( + DESCRIPTOR = _CALIBRATIONCONFIG, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.CalibrationConfig) + )) +_sym_db.RegisterMessage(CalibrationConfig) + +FunctionApproximation = _reflection.GeneratedProtocolMessageType('FunctionApproximation', (_message.Message,), dict( + DESCRIPTOR = _FUNCTIONAPPROXIMATION, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.FunctionApproximation) + )) +_sym_db.RegisterMessage(FunctionApproximation) + +ClassIdFunctionApproximations = _reflection.GeneratedProtocolMessageType('ClassIdFunctionApproximations', (_message.Message,), dict( + + ClassIdXyPairsMapEntry = _reflection.GeneratedProtocolMessageType('ClassIdXyPairsMapEntry', (_message.Message,), dict( + DESCRIPTOR = _CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ClassIdFunctionApproximations.ClassIdXyPairsMapEntry) + )) + , + DESCRIPTOR = _CLASSIDFUNCTIONAPPROXIMATIONS, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ClassIdFunctionApproximations) + )) +_sym_db.RegisterMessage(ClassIdFunctionApproximations) +_sym_db.RegisterMessage(ClassIdFunctionApproximations.ClassIdXyPairsMapEntry) + +SigmoidCalibration = _reflection.GeneratedProtocolMessageType('SigmoidCalibration', (_message.Message,), dict( + DESCRIPTOR = _SIGMOIDCALIBRATION, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.SigmoidCalibration) + )) +_sym_db.RegisterMessage(SigmoidCalibration) + +ClassIdSigmoidCalibrations = _reflection.GeneratedProtocolMessageType('ClassIdSigmoidCalibrations', (_message.Message,), dict( + + ClassIdSigmoidParametersMapEntry = _reflection.GeneratedProtocolMessageType('ClassIdSigmoidParametersMapEntry', (_message.Message,), dict( + DESCRIPTOR = _CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry) + )) + , + DESCRIPTOR = _CLASSIDSIGMOIDCALIBRATIONS, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.ClassIdSigmoidCalibrations) + )) +_sym_db.RegisterMessage(ClassIdSigmoidCalibrations) +_sym_db.RegisterMessage(ClassIdSigmoidCalibrations.ClassIdSigmoidParametersMapEntry) + +TemperatureScalingCalibration = _reflection.GeneratedProtocolMessageType('TemperatureScalingCalibration', (_message.Message,), dict( + DESCRIPTOR = _TEMPERATURESCALINGCALIBRATION, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.TemperatureScalingCalibration) + )) +_sym_db.RegisterMessage(TemperatureScalingCalibration) + +XYPairs = _reflection.GeneratedProtocolMessageType('XYPairs', (_message.Message,), dict( + + XYPair = _reflection.GeneratedProtocolMessageType('XYPair', (_message.Message,), dict( + DESCRIPTOR = _XYPAIRS_XYPAIR, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.XYPairs.XYPair) + )) + , + DESCRIPTOR = _XYPAIRS, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.XYPairs) + )) +_sym_db.RegisterMessage(XYPairs) +_sym_db.RegisterMessage(XYPairs.XYPair) + +SigmoidParameters = _reflection.GeneratedProtocolMessageType('SigmoidParameters', (_message.Message,), dict( + DESCRIPTOR = _SIGMOIDPARAMETERS, + __module__ = 'object_detection.protos.calibration_pb2' + # @@protoc_insertion_point(class_scope:object_detection.protos.SigmoidParameters) + )) +_sym_db.RegisterMessage(SigmoidParameters) + + +_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY.has_options = True +_CLASSIDFUNCTIONAPPROXIMATIONS_CLASSIDXYPAIRSMAPENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')) +_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY.has_options = True +_CLASSIDSIGMOIDCALIBRATIONS_CLASSIDSIGMOIDPARAMETERSMAPENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')) +# @@protoc_insertion_point(module_scope) diff --git a/protos/eval.proto b/protos/eval.proto new file mode 100644 index 0000000..357edc9 --- /dev/null +++ b/protos/eval.proto @@ -0,0 +1,94 @@ +syntax = "proto2"; + +package object_detection.protos; + +// Message for configuring DetectionModel evaluation jobs (eval.py). +// Next id - 30 +message EvalConfig { + optional uint32 batch_size = 25 [default = 1]; + // Number of visualization images to generate. + optional uint32 num_visualizations = 1 [default = 10]; + + // Number of examples to process of evaluation. + optional uint32 num_examples = 2 [default = 5000, deprecated = true]; + + // How often to run evaluation. + optional uint32 eval_interval_secs = 3 [default = 300]; + + // Maximum number of times to run evaluation. If set to 0, will run forever. + optional uint32 max_evals = 4 [default = 0, deprecated = true]; + + // Whether the TensorFlow graph used for evaluation should be saved to disk. + optional bool save_graph = 5 [default = false]; + + // Path to directory to store visualizations in. If empty, visualization + // images are not exported (only shown on Tensorboard). + optional string visualization_export_dir = 6 [default = ""]; + + // BNS name of the TensorFlow master. + optional string eval_master = 7 [default = ""]; + + // Type of metrics to use for evaluation. + repeated string metrics_set = 8; + + // Path to export detections to COCO compatible JSON format. + optional string export_path = 9 [default ='']; + + // Option to not read groundtruth labels and only export detections to + // COCO-compatible JSON file. + optional bool ignore_groundtruth = 10 [default = false]; + + // Use exponential moving averages of variables for evaluation. + // TODO(rathodv): When this is false make sure the model is constructed + // without moving averages in restore_fn. + optional bool use_moving_averages = 11 [default = false]; + + // Whether to evaluate instance masks. + // Note that since there is no evaluation code currently for instance + // segmenation this option is unused. + optional bool eval_instance_masks = 12 [default = false]; + + // Minimum score threshold for a detected object box to be visualized + optional float min_score_threshold = 13 [default = 0.5]; + + // Maximum number of detections to visualize + optional int32 max_num_boxes_to_visualize = 14 [default = 20]; + + // When drawing a single detection, each label is by default visualized as + //