-
Notifications
You must be signed in to change notification settings - Fork 103
/
voc.py
177 lines (148 loc) · 7.06 KB
/
voc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os
from xml.etree import ElementTree
from .utils import get_class_names
import numpy as np
from ..abstract import Loader
class VOC(Loader):
""" Dataset loader for the falling things dataset (FAT).
# Arguments
data_path: Data path to VOC2007 annotations
split: String determining the data split to load.
e.g. `train`, `val` or `test`
class_names: `all` or list. If list it should contain as elements
strings indicating each class name.
name: String or list indicating with dataset or datasets to load.
e.g. ``VOC2007`` or ``[''VOC2007'', VOC2012]``.
with_difficult_samples: Boolean. If ``True`` flagged difficult boxes
will be added to the returned data.
evaluate: Boolean. If ``True`` returned data will be loaded without
normalization for a direct evaluation.
# Return
data: List of dictionaries with keys corresponding to the image paths
and values numpy arrays of shape ``[num_objects, 4 + 1]``
where the ``+ 1`` contains the ``class_arg`` and ``num_objects`` refers
to the amount of boxes in the image.
"""
# TODO check for split
def __init__(self, path=None, split='train', class_names='all',
name='VOC2007', with_difficult_samples=True, evaluate=False):
super(VOC, self).__init__(path, split, class_names, name)
self.with_difficult_samples = with_difficult_samples
self.evaluate = evaluate
self._class_names = class_names
if class_names == 'all':
self._class_names = get_class_names('VOC')
self.images_path = None
self.arg_to_class = None
def load_data(self):
if ((self.name == 'VOC2007') or (self.name == 'VOC2012')):
ground_truth_data = self._load_VOC(self.name, self.split)
elif isinstance(self.name, list):
if not isinstance(self.split, list):
raise Exception("'split' should also be a list")
if set(self.name).issubset(['VOC2007', 'VOC2012']):
data_A = self._load_VOC(self.name[0], self.split[0])
data_B = self._load_VOC(self.name[1], self.split[1])
ground_truth_data = data_A + data_B
else:
raise ValueError('Invalid name given.')
return ground_truth_data
def _load_VOC(self, dataset_name, split):
self.parser = VOCParser(dataset_name,
split,
self._class_names,
self.with_difficult_samples,
self.path,
self.evaluate)
self.images_path = self.parser.images_path
self.arg_to_class = self.parser.arg_to_class
ground_truth_data = self.parser.load_data()
return ground_truth_data
class VOCParser(object):
""" Preprocess the VOC2007 xml annotations data.
# TODO: Add background label
# Arguments
data_path: Data path to VOC2007 annotations
# Return
data: Dictionary which keys correspond to the image names
and values are numpy arrays of shape (num_objects, 4 + 1)
num_objects refers to the number of objects in that specific image
"""
def __init__(self, dataset_name='VOC2007', split='train',
class_names='all', with_difficult_samples=True,
dataset_path='../datasets/VOCdevkit/',
evaluate=False):
if dataset_name not in ['VOC2007', 'VOC2012']:
raise Exception('Invalid dataset name.')
# creating data set prefix paths variables
self.dataset_name = dataset_name
self.dataset_path = os.path.join(dataset_path, dataset_name)
self.split = split
self.split_prefix = os.path.join(self.dataset_path, 'ImageSets/Main/')
self.annotations_path = os.path.join(self.dataset_path, 'Annotations/')
self.images_path = os.path.join(self.dataset_path, 'JPEGImages/')
self.with_difficult_samples = with_difficult_samples
self.evaluate = evaluate
self.class_names = class_names
if self.class_names == 'all':
self.class_names = get_class_names('VOC')
self.num_classes = len(self.class_names)
class_keys = np.arange(self.num_classes)
self.arg_to_class = dict(zip(class_keys, self.class_names))
self.class_to_arg = {value: key for key, value
in self.arg_to_class.items()}
self.data = []
self._preprocess_XML()
def _load_filenames(self):
split_file = os.path.join(self.split_prefix, self.split) + '.txt'
splitted_filenames = []
for line in open(split_file):
filename = line.strip() + '.xml'
splitted_filenames.append(filename)
return splitted_filenames
def _preprocess_XML(self):
filenames = self._load_filenames()
for filename in filenames:
filename_path = self.annotations_path + filename
tree = ElementTree.parse(filename_path)
root = tree.getroot()
image_name = root.find('filename').text
box_data = []
difficulties = []
size_tree = root.find('size')
width = float(size_tree.find('width').text)
height = float(size_tree.find('height').text)
# check evaluate flag
if self.evaluate:
width = 1
height = 1
for object_tree in root.findall('object'):
difficulty = int(object_tree.find('difficult').text)
if difficulty == 1 and not (self.with_difficult_samples):
continue
class_name = object_tree.find('name').text
if class_name in self.class_names:
class_arg = self.class_to_arg[class_name]
bounding_box = object_tree.find('bndbox')
# VOC dataset format follows Matlab,
# in which indexes start from 0
xmin = (float(bounding_box.find('xmin').text) - 1.0) / width
ymin = (float(bounding_box.find('ymin').text) - 1.0) / height
xmax = (float(bounding_box.find('xmax').text) - 1.0) / width
ymax = (float(bounding_box.find('ymax').text) - 1.0) / height
box_data.append([xmin, ymin, xmax, ymax, class_arg])
difficulties.append(difficulty)
if len(box_data) == 0:
continue
# self.data[self.images_path + image_name] = label_data
image_path = self.images_path + image_name
box_data = np.asarray(box_data)
difficulties = np.asarray(difficulties, dtype=bool)
if self.evaluate:
self.data.append({'image': image_path,
'boxes': box_data,
'difficulties': difficulties})
else:
self.data.append({'image': image_path, 'boxes': box_data})
def load_data(self):
return self.data