diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py index 7b91aef4f5d5..42d5497c4719 100644 --- a/datumaro/datumaro/plugins/coco_format/converter.py +++ b/datumaro/datumaro/plugins/coco_format/converter.py @@ -14,12 +14,13 @@ from datumaro.components.converter import Converter from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, - AnnotationType, Points, Mask + AnnotationType, Points ) from datumaro.components.cli_plugin import CliPlugin from datumaro.util import find from datumaro.util.image import save_image import datumaro.util.mask_tools as mask_tools +import datumaro.util.annotation_tools as anno_tools from .format import CocoTask, CocoPath @@ -194,7 +195,7 @@ def crop_segments(cls, instances, img_width, img_height): if inst[1]: inst[1] = sum(new_segments, []) else: - mask = cls.merge_masks(new_segments) + mask = mask_tools.merge_masks(new_segments) inst[2] = mask_tools.mask_to_rle(mask) return instances @@ -205,8 +206,8 @@ def find_instance_parts(self, group, img_width, img_height): masks = [a for a in group if a.type == AnnotationType.mask] anns = boxes + polygons + masks - leader = self.find_group_leader(anns) - bbox = self.compute_bbox(anns) + leader = anno_tools.find_group_leader(anns) + bbox = anno_tools.compute_bbox(anns) mask = None polygons = [p.points for p in polygons] @@ -228,68 +229,29 @@ def find_instance_parts(self, group, img_width, img_height): if masks: if mask is not None: masks += [mask] - mask = self.merge_masks(masks) + mask = mask_tools.merge_masks([m.image for m in masks]) if mask is not None: mask = mask_tools.mask_to_rle(mask) polygons = [] else: if masks: - mask = self.merge_masks(masks) + mask = mask_tools.merge_masks([m.image for m in masks]) polygons += mask_tools.mask_to_polygons(mask) mask = None return [leader, polygons, mask, bbox] - @staticmethod - def find_group_leader(group): - return max(group, key=lambda x: x.get_area()) - - @staticmethod - def merge_masks(masks): - if not masks: - return None - - def get_mask(m): - if isinstance(m, Mask): - return m.image - else: - return m - - binary_mask = get_mask(masks[0]) - for m in masks[1:]: - binary_mask |= get_mask(m) - - return binary_mask - - @staticmethod - def compute_bbox(annotations): - boxes = [ann.get_bbox() for ann in annotations] - x0 = min((b[0] for b in boxes), default=0) - y0 = min((b[1] for b in boxes), default=0) - x1 = max((b[0] + b[2] for b in boxes), default=0) - y1 = max((b[1] + b[3] for b in boxes), default=0) - return [x0, y0, x1 - x0, y1 - y0] - @staticmethod def find_instance_anns(annotations): return [a for a in annotations - if a.type in { AnnotationType.bbox, AnnotationType.polygon } or \ - a.type == AnnotationType.mask and a.label is not None + if a.type in { AnnotationType.bbox, + AnnotationType.polygon, AnnotationType.mask } ] @classmethod def find_instances(cls, annotations): - instance_anns = cls.find_instance_anns(annotations) - - ann_groups = [] - for g_id, group in groupby(instance_anns, lambda a: a.group): - if not g_id: - ann_groups.extend(([a] for a in group)) - else: - ann_groups.append(list(group)) - - return ann_groups + return anno_tools.find_instances(cls.find_instance_anns(annotations)) def save_annotations(self, item): instances = self.find_instances(item.annotations) diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py index 340492638fe5..2a32d4f151af 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py @@ -16,98 +16,34 @@ from datumaro.components.converter import Converter from datumaro.components.cli_plugin import CliPlugin from datumaro.util.image import encode_image +from datumaro.util.mask_tools import merge_masks +from datumaro.util.annotation_tools import (compute_bbox, + find_group_leader, find_instances) from datumaro.util.tf_util import import_tf as _import_tf from .format import DetectionApiPath tf = _import_tf() -# we need it to filter out non-ASCII characters, otherwise training will crash +# filter out non-ASCII characters, otherwise training will crash _printable = set(string.printable) def _make_printable(s): return ''.join(filter(lambda x: x in _printable, s)) -def _make_tf_example(item, get_label_id, get_label, save_images=False): - def int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - def int64_list_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - def bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def bytes_list_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - - def float_list_feature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - - features = { - 'image/source_id': bytes_feature(str(item.id).encode('utf-8')), - 'image/filename': bytes_feature( - ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')), - } - - if not item.has_image: - raise Exception("Failed to export dataset item '%s': " - "item has no image info" % item.id) - height, width = item.image.size - - features.update({ - 'image/height': int64_feature(height), - 'image/width': int64_feature(width), - }) - - features.update({ - 'image/encoded': bytes_feature(b''), - 'image/format': bytes_feature(b'') - }) - if save_images: - if item.has_image and item.image.has_data: - fmt = DetectionApiPath.IMAGE_FORMAT - buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT) - - features.update({ - 'image/encoded': bytes_feature(buffer), - 'image/format': bytes_feature(fmt.encode('utf-8')), - }) - else: - log.warning("Item '%s' has no image" % item.id) - - xmins = [] # List of normalized left x coordinates in bounding box (1 per box) - xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box) - ymins = [] # List of normalized top y coordinates in bounding box (1 per box) - ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box) - classes_text = [] # List of string class name of bounding box (1 per box) - classes = [] # List of integer class id of bounding box (1 per box) - - boxes = [ann for ann in item.annotations if ann.type is AnnotationType.bbox] - for box in boxes: - box_label = _make_printable(get_label(box.label)) - - xmins.append(box.points[0] / width) - xmaxs.append(box.points[2] / width) - ymins.append(box.points[1] / height) - ymaxs.append(box.points[3] / height) - classes_text.append(box_label.encode('utf-8')) - classes.append(get_label_id(box.label)) - - if boxes: - features.update({ - 'image/object/bbox/xmin': float_list_feature(xmins), - 'image/object/bbox/xmax': float_list_feature(xmaxs), - 'image/object/bbox/ymin': float_list_feature(ymins), - 'image/object/bbox/ymax': float_list_feature(ymaxs), - 'image/object/class/text': bytes_list_feature(classes_text), - 'image/object/class/label': int64_list_feature(classes), - }) +def int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + +def int64_list_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - tf_example = tf.train.Example( - features=tf.train.Features(feature=features)) +def bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - return tf_example +def bytes_list_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + +def float_list_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) class TfDetectionApiConverter(Converter, CliPlugin): @classmethod @@ -115,16 +51,29 @@ def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument('--save-images', action='store_true', help="Save images (default: %(default)s)") + parser.add_argument('--save-masks', action='store_true', + help="Include instance masks (default: %(default)s)") return parser - def __init__(self, save_images=False): + def __init__(self, save_images=False, save_masks=False): super().__init__() self._save_images = save_images + self._save_masks = save_masks def __call__(self, extractor, save_dir): os.makedirs(save_dir, exist_ok=True) + label_categories = extractor.categories().get(AnnotationType.label, + LabelCategories()) + get_label = lambda label_id: label_categories.items[label_id].name \ + if label_id is not None else '' + label_ids = OrderedDict((label.name, 1 + idx) + for idx, label in enumerate(label_categories.items)) + map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0) + self._get_label = get_label + self._get_label_id = map_label_id + subsets = extractor.subsets() if len(subsets) == 0: subsets = [ None ] @@ -136,14 +85,6 @@ def __call__(self, extractor, save_dir): subset_name = DEFAULT_SUBSET_NAME subset = extractor - label_categories = subset.categories().get(AnnotationType.label, - LabelCategories()) - get_label = lambda label_id: label_categories.items[label_id].name \ - if label_id is not None else '' - label_ids = OrderedDict((label.name, 1 + idx) - for idx, label in enumerate(label_categories.items)) - map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0) - labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE) with codecs.open(labelmap_path, 'w', encoding='utf8') as f: for label, idx in label_ids.items(): @@ -157,10 +98,106 @@ def __call__(self, extractor, save_dir): anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name)) with tf.io.TFRecordWriter(anno_path) as writer: for item in subset: - tf_example = _make_tf_example( - item, - get_label=get_label, - get_label_id=map_label_id, - save_images=self._save_images, - ) + tf_example = self._make_tf_example(item) writer.write(tf_example.SerializeToString()) + + @staticmethod + def _find_instances(annotations): + return find_instances(a for a in annotations + if a.type in { AnnotationType.bbox, AnnotationType.mask }) + + def _find_instance_parts(self, group, img_width, img_height): + boxes = [a for a in group if a.type == AnnotationType.bbox] + masks = [a for a in group if a.type == AnnotationType.mask] + + anns = boxes + masks + leader = find_group_leader(anns) + bbox = compute_bbox(anns) + + mask = None + if self._save_masks: + mask = merge_masks([m.image for m in masks]) + + return [leader, mask, bbox] + + def _export_instances(self, instances, width, height): + xmins = [] # List of normalized left x coordinates of bounding boxes (1 per box) + xmaxs = [] # List of normalized right x coordinates of bounding boxes (1 per box) + ymins = [] # List of normalized top y coordinates of bounding boxes (1 per box) + ymaxs = [] # List of normalized bottom y coordinates of bounding boxes (1 per box) + classes_text = [] # List of class names of bounding boxes (1 per box) + classes = [] # List of class ids of bounding boxes (1 per box) + masks = [] # List of PNG-encoded instance masks (1 per box) + + for leader, mask, box in instances: + label = _make_printable(self._get_label(leader.label)) + classes_text.append(label.encode('utf-8')) + classes.append(self._get_label_id(leader.label)) + + xmins.append(box[0] / width) + xmaxs.append((box[0] + box[2]) / width) + ymins.append(box[1] / height) + ymaxs.append((box[1] + box[3]) / height) + + if self._save_masks: + if mask is not None: + mask = encode_image(mask, '.png') + else: + mask = b'' + masks.append(mask) + + result = {} + if classes: + result = { + 'image/object/bbox/xmin': float_list_feature(xmins), + 'image/object/bbox/xmax': float_list_feature(xmaxs), + 'image/object/bbox/ymin': float_list_feature(ymins), + 'image/object/bbox/ymax': float_list_feature(ymaxs), + 'image/object/class/text': bytes_list_feature(classes_text), + 'image/object/class/label': int64_list_feature(classes), + } + if masks: + result['image/object/mask'] = bytes_list_feature(masks) + return result + + def _make_tf_example(self, item): + features = { + 'image/source_id': bytes_feature(str(item.id).encode('utf-8')), + 'image/filename': bytes_feature( + ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')), + } + + if not item.has_image: + raise Exception("Failed to export dataset item '%s': " + "item has no image info" % item.id) + height, width = item.image.size + + features.update({ + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + }) + + features.update({ + 'image/encoded': bytes_feature(b''), + 'image/format': bytes_feature(b'') + }) + if self._save_images: + if item.has_image and item.image.has_data: + fmt = DetectionApiPath.IMAGE_FORMAT + buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT) + + features.update({ + 'image/encoded': bytes_feature(buffer), + 'image/format': bytes_feature(fmt.encode('utf-8')), + }) + else: + log.warning("Item '%s' has no image" % item.id) + + instances = self._find_instances(item.annotations) + instances = [self._find_instance_parts(i, width, height) for i in instances] + features.update(self._export_instances(instances, width, height)) + + tf_example = tf.train.Example( + features=tf.train.Features(feature=features)) + + return tf_example diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py index 8974c65d8053..eebff4a19dc0 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py @@ -10,7 +10,7 @@ from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME, DatasetItem, - AnnotationType, Bbox, LabelCategories + AnnotationType, Bbox, Mask, LabelCategories ) from datumaro.util.image import Image, decode_image, lazy_image from datumaro.util.tf_util import import_tf as _import_tf @@ -147,6 +147,8 @@ def _parse_tfrecord_file(cls, filepath, subset_name, images_dir): labels = tf.sparse.to_dense( parsed_record['image/object/class/text'], default_value=b'').numpy() + masks = tf.sparse.to_dense( + parsed_record['image/object/mask']).numpy() for label, label_id in zip(labels, label_ids): label = label.decode('utf-8') @@ -163,15 +165,38 @@ def _parse_tfrecord_file(cls, filepath, subset_name, images_dir): item_id = osp.splitext(frame_filename)[0] annotations = [] - for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]: + for shape_id, shape in enumerate( + np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]): label = shape[0].decode('utf-8') - x = clamp(shape[1] * frame_width, 0, frame_width) - y = clamp(shape[2] * frame_height, 0, frame_height) - w = clamp(shape[3] * frame_width, 0, frame_width) - x - h = clamp(shape[4] * frame_height, 0, frame_height) - y - annotations.append(Bbox(x, y, w, h, - label=dataset_labels.get(label) - )) + + mask = None + if len(masks) != 0: + mask = masks[shape_id] + + if mask is not None: + if isinstance(mask, bytes): + mask = lazy_image(mask, decode_image) + annotations.append(Mask(image=mask, + label=dataset_labels.get(label) + )) + else: + x = clamp(shape[1] * frame_width, 0, frame_width) + y = clamp(shape[2] * frame_height, 0, frame_height) + w = clamp(shape[3] * frame_width, 0, frame_width) - x + h = clamp(shape[4] * frame_height, 0, frame_height) - y + annotations.append(Bbox(x, y, w, h, + label=dataset_labels.get(label) + )) + + image_size = None + if frame_height and frame_width: + image_size = (frame_height, frame_width) + + image_params = {} + if frame_image and frame_format: + image_params['data'] = lazy_image(frame_image, decode_image) + if frame_filename and images_dir: + image_params['path'] = osp.join(images_dir, frame_filename) image_size = None if frame_height and frame_width: diff --git a/datumaro/datumaro/plugins/transforms.py b/datumaro/datumaro/plugins/transforms.py index 81c5ff501872..693edbc339c2 100644 --- a/datumaro/datumaro/plugins/transforms.py +++ b/datumaro/datumaro/plugins/transforms.py @@ -3,16 +3,16 @@ # # SPDX-License-Identifier: MIT -from itertools import groupby import logging as log import os.path as osp import pycocotools.mask as mask_utils from datumaro.components.extractor import (Transform, AnnotationType, - Mask, RleMask, Polygon, Bbox) + RleMask, Polygon, Bbox) from datumaro.components.cli_plugin import CliPlugin import datumaro.util.mask_tools as mask_tools +from datumaro.util.annotation_tools import find_group_leader, find_instances class CropCoveredSegments(Transform, CliPlugin): @@ -125,7 +125,7 @@ def merge_segments(cls, instance, img_width, img_height, if not polygons and not masks: return [] - leader = cls.find_group_leader(polygons + masks) + leader = find_group_leader(polygons + masks) instance = [] # Build the resulting mask @@ -138,9 +138,10 @@ def merge_segments(cls, instance, img_width, img_height, instance += polygons # keep unused polygons if masks: + masks = [m.image for m in masks] if mask is not None: masks += [mask] - mask = cls.merge_masks(masks) + mask = mask_tools.merge_masks(masks) if mask is None: return instance @@ -154,41 +155,10 @@ def merge_segments(cls, instance, img_width, img_height, ) return instance - @staticmethod - def find_group_leader(group): - return max(group, key=lambda x: x.get_area()) - - @staticmethod - def merge_masks(masks): - if not masks: - return None - - def get_mask(m): - if isinstance(m, Mask): - return m.image - else: - return m - - binary_mask = get_mask(masks[0]) - for m in masks[1:]: - binary_mask |= get_mask(m) - - return binary_mask - @staticmethod def find_instances(annotations): - segment_anns = (a for a in annotations - if a.type in {AnnotationType.polygon, AnnotationType.mask} - ) - - ann_groups = [] - for g_id, group in groupby(segment_anns, lambda a: a.group): - if g_id is None: - ann_groups.extend(([a] for a in group)) - else: - ann_groups.append(list(group)) - - return ann_groups + return find_instances(a for a in annotations + if a.type in {AnnotationType.polygon, AnnotationType.mask}) class PolygonsToMasks(Transform, CliPlugin): def transform_item(self, item): diff --git a/datumaro/datumaro/util/annotation_tools.py b/datumaro/datumaro/util/annotation_tools.py new file mode 100644 index 000000000000..00871b157ec3 --- /dev/null +++ b/datumaro/datumaro/util/annotation_tools.py @@ -0,0 +1,28 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from itertools import groupby + + +def find_instances(instance_anns): + ann_groups = [] + for g_id, group in groupby(instance_anns, lambda a: a.group): + if not g_id: + ann_groups.extend(([a] for a in group)) + else: + ann_groups.append(list(group)) + + return ann_groups + +def find_group_leader(group): + return max(group, key=lambda x: x.get_area()) + +def compute_bbox(annotations): + boxes = [ann.get_bbox() for ann in annotations] + x0 = min((b[0] for b in boxes), default=0) + y0 = min((b[1] for b in boxes), default=0) + x1 = max((b[0] + b[2] for b in boxes), default=0) + y1 = max((b[1] + b[3] for b in boxes), default=0) + return [x0, y0, x1 - x0, y1 - y0] \ No newline at end of file diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index 712a4f789ea4..2d465f71a4c4 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -169,8 +169,6 @@ def __init__(self, data=None, path=None, loader=None, cache=None, if size is not None: assert len(size) == 2 and 0 < size[0] and 0 < size[1], size size = tuple(size) - else: - size = None self._size = size # (H, W) assert path is None or isinstance(path, str) diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py index efbef0fd2b81..0bd29ae41794 100644 --- a/datumaro/tests/test_tfrecord_format.py +++ b/datumaro/tests/test_tfrecord_format.py @@ -3,7 +3,7 @@ from unittest import TestCase from datumaro.components.extractor import (Extractor, DatasetItem, - AnnotationType, Bbox, LabelCategories + AnnotationType, Bbox, Mask, LabelCategories ) from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter from datumaro.plugins.tf_detection_api_format.extractor import TfDetectionApiExtractor @@ -65,6 +65,35 @@ def categories(self): TestExtractor(), TfDetectionApiConverter(save_images=True), test_dir) + def test_can_save_masks(self): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), + annotations=[ + Mask(image=np.array([ + [1, 0, 0, 1], + [0, 1, 1, 0], + [0, 1, 1, 0], + [1, 0, 0, 1], + ]), label=1), + ] + ), + ]) + + def categories(self): + label_cat = LabelCategories() + for label in range(10): + label_cat.add('label_' + str(label)) + return { + AnnotationType.label: label_cat, + } + + with TestDir() as test_dir: + self._test_save_and_load( + TestExtractor(), TfDetectionApiConverter(save_masks=True), + test_dir) + def test_can_save_dataset_with_no_subsets(self): class TestExtractor(Extractor): def __iter__(self): diff --git a/datumaro/tests/test_transforms.py b/datumaro/tests/test_transforms.py index 11e997b19d73..6260fe517fd0 100644 --- a/datumaro/tests/test_transforms.py +++ b/datumaro/tests/test_transforms.py @@ -159,8 +159,10 @@ def __iter__(self): [1, 0, 0, 0, 0], [1, 1, 1, 0, 0]], ), - z_order=0), + z_order=0, group=1), Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1, group=1), + Polygon([0, 0, 0, 2, 2, 2, 2, 0], z_order=1), ] ), @@ -178,7 +180,15 @@ def __iter__(self): [1, 1, 1, 1, 0], [1, 1, 1, 0, 0]], ), - z_order=0), + z_order=0, group=1), + Mask(np.array([ + [1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=1), ] ), ])