Add COCODetection starter code

amdegroot · Mar 5, 2018 · 0628cb0 · 0628cb0
1 parent 2196f7b
commit 0628cb0
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 1 deletion.
diff --git a/data/__init__.py b/data/__init__.py
@@ -1,4 +1,5 @@
 from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
+from .coco import COCODetection, COCOAnnotationTransform
 from .config import *
 import cv2
 import numpy as np

diff --git a/data/coco.py b/data/coco.py
@@ -0,0 +1,103 @@
+class COCOAnnotationTransform(object):
+    """Transforms a VOC annotation into a Tensor of bbox coords and label index
+    Initilized with a dictionary lookup of classnames to indexes
+
+    Arguments:
+        class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
+            (default: alphabetic indexing of VOC's 20 classes)
+        keep_difficult (bool, optional): keep difficult instances or not
+            (default: False)
+        height (int): height
+        width (int): width
+    """
+
+#     def __init__(self)
+
+    def __call__(self, target, width, height):
+        """
+        Arguments:
+            target (annotation) : the target annotation to be made usable
+                will be an ET.Element
+        Returns:
+            a list containing lists of bounding boxes  [bbox coords, class name]
+        """
+        scale = np.array([width, height, width, height])
+        res = []
+        for obj in target:
+            if 'bbox' in obj:
+                bbox = obj['bbox']
+                bbox[2] += bbox[0]
+                bbox[3] += bbox[1]
+                label_idx = obj['category_id']
+                final_box = list(np.array(bbox)/scale)
+                final_box.append(label_idx)
+                res += [final_box]  # [xmin, ymin, xmax, ymax, label_ind]
+        return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
+
+
+class COCODetection(data.Dataset):
+    """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+
+    def __init__(self, root, annFile, transform=None, target_transform=None):
+        from pycocotools.coco import COCO
+        self.root = root
+        self.coco = COCO(annFile)
+        self.ids = list(self.coco.imgs.keys())
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
+        """
+        im, gt, h, w = self.pull_item(index)
+        return im, gt
+
+    def __len__(self):
+        return len(self.ids)
+
+    def pull_item(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
+        """
+        coco = self.coco
+        img_id = self.ids[index]
+        ann_ids = coco.getAnnIds(imgIds=img_id)
+        target = coco.loadAnns(ann_ids)
+        path = coco.loadImgs(img_id)[0]['file_name']
+        img = cv2.imread(os.path.join(self.root, path))
+        height, width, channels = img.shape
+        if self.target_transform is not None:
+            target = self.target_transform(target, width, height)
+        if self.transform is not None:
+            target = np.array(target)
+            img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
+            # to rgb
+            img = img[:, :, (2, 1, 0)]
+            # img = img.transpose(2, 0, 1)
+            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
+        return torch.from_numpy(img).permute(2, 0, 1), target, height, width
+
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
diff --git a/data/voc0712.py b/data/voc0712.py
@@ -115,7 +115,6 @@ def __init__(self, root, image_sets, transform=None, target_transform=None,
 
     def __getitem__(self, index):
         im, gt, h, w = self.pull_item(index)
-
         return im, gt
 
     def __len__(self):

diff --git a/utils/__init__.py b/utils/__init__.py
@@ -0,0 +1 @@
+from .augmentations import SSDAugmentation