Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: refactoring rotated boxes #731

Merged
merged 36 commits into from
Dec 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
aa4e253
refacto: rboxes
charlesmindee Dec 16, 2021
dc95b33
feat: builder modifications
charlesmindee Dec 17, 2021
3b3418f
fix: rotate_abs_boxes
charlesmindee Dec 17, 2021
d940277
refacto: viz + metrics
charlesmindee Dec 17, 2021
5090582
fix: merging issues
charlesmindee Dec 20, 2021
29d9d53
fix: flake8 + typing
charlesmindee Dec 20, 2021
17c8572
Merge branch 'main' into refacto_polys
charlesmindee Dec 20, 2021
2903f02
fix: debug 1
charlesmindee Dec 20, 2021
35332e6
fix: debug 2 tests
charlesmindee Dec 21, 2021
e0e32c7
fix: debug test 3
charlesmindee Dec 21, 2021
2876eb9
fix: requested changes
charlesmindee Dec 21, 2021
6a70b70
Merge branch 'main' into refacto_polys
charlesmindee Dec 21, 2021
adb95bf
fix: test rotate
charlesmindee Dec 21, 2021
183ce86
fix: debug 4
charlesmindee Dec 22, 2021
a934869
fix: cv2 tests metrics
charlesmindee Dec 22, 2021
a1b76b5
fix: revert changes
charlesmindee Dec 22, 2021
ad593d2
fix: isort
charlesmindee Dec 23, 2021
3db928f
fix: thresh for rotated ckpt
charlesmindee Dec 23, 2021
b388bf5
fix: utils + scripts
charlesmindee Dec 23, 2021
6ae8240
fix: warnings
charlesmindee Dec 23, 2021
cef1931
fix: tests
charlesmindee Dec 23, 2021
8772c63
Merge branch 'main' into refacto_polys
charlesmindee Dec 23, 2021
2e29f9f
fix: tf zoo tests
charlesmindee Dec 23, 2021
badbb6a
Merge branch 'main' into refacto_polys
charlesmindee Dec 23, 2021
213ac99
fix: requested changes 1
charlesmindee Dec 23, 2021
5f38ab4
fix: tests
charlesmindee Dec 24, 2021
6877601
fix: tests
charlesmindee Dec 24, 2021
3552a97
fix: scripts
charlesmindee Dec 24, 2021
bbf689d
fix: dataset stack
charlesmindee Dec 24, 2021
445f9d4
fix: typing
charlesmindee Dec 24, 2021
6bd4c59
fix: test zoo tf
charlesmindee Dec 24, 2021
17f1758
fix: builder
charlesmindee Dec 24, 2021
a6f01c8
fix: merging conflicts
charlesmindee Dec 24, 2021
4f99940
fix: tests merging conflicts
charlesmindee Dec 24, 2021
219661f
fix: minor changes
charlesmindee Dec 26, 2021
715040a
fix: crop extraction fn
charlesmindee Dec 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import numpy as np

from doctr.utils.geometry import fit_rbbox

from .datasets import VisionDataset

__all__ = ['CORD']
Expand All @@ -28,7 +26,7 @@ class CORD(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""
TRAIN = ('https://github.com/mindee/doctr/releases/download/v0.1.1/cord_train.zip',
Expand All @@ -40,7 +38,7 @@ class CORD(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -65,13 +63,13 @@ def __init__(
if len(word["text"]) > 0:
x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
if rotated_bbox:
box = list(fit_rbbox(np.array([
if use_polygons:
box = np.array([
[x[0], y[0]],
[x[1], y[1]],
[x[2], y[2]],
[x[3], y[3]],
], dtype=np.float32)))
], dtype=np.float32)
else:
# Reduce 8 coords to 4
box = [min(x), min(y), max(x), max(y)]
Expand Down
19 changes: 6 additions & 13 deletions doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

import numpy as np

from doctr.utils.geometry import fit_rbbox

from .datasets import AbstractDataset

__all__ = ["DetectionDataset"]
Expand All @@ -27,14 +25,14 @@ class DetectionDataset(AbstractDataset):
Args:
img_folder: folder with all the images of the dataset
label_path: path to the annotations of each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
"""

def __init__(
self,
img_folder: str,
label_path: str,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:
super().__init__(img_folder, **kwargs)
Expand All @@ -52,14 +50,9 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

polygons = np.asarray(label['polygons'])
if rotated_bbox:
# Switch to rotated rects
boxes = np.asarray([list(fit_rbbox(poly)) for poly in polygons])
else:
# Switch to xmin, ymin, xmax, ymax
boxes = np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)
geoms = polygons if use_polygons else np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)

self.data.append((img_name, np.asarray(boxes, dtype=np.float32)))
self.data.append((img_name, np.asarray(geoms, dtype=np.float32)))

def __getitem__(
self,
Expand All @@ -76,8 +69,8 @@ def __getitem__(

# Boxes
target = target.copy()
target[..., [0, 2]] /= w
target[..., [1, 3]] /= h
target[..., 0] /= w
target[..., 1] /= h
target = target.clip(0, 1)

return img, target
22 changes: 11 additions & 11 deletions doctr/datasets/doc_artefacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class DocArtefacts(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -36,7 +36,7 @@ class DocArtefacts(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -61,15 +61,15 @@ def __init__(

boxes = np.asarray([obj['geometry'] for obj in label], dtype=np_dtype)
classes = np.asarray([self.CLASSES.index(obj['label']) for obj in label], dtype=np.int64)
if rotated_bbox:
# box_targets: xmin, ymin, xmax, ymax -> x, y, w, h, alpha = 0
boxes = np.stack((
boxes[:, [0, 2]].mean(axis=1),
boxes[:, [1, 3]].mean(axis=1),
boxes[:, 2] - boxes[:, 0],
boxes[:, 3] - boxes[:, 1],
np.zeros(boxes.shape[0], dtype=np_dtype),
), axis=1)
if use_polygons:
boxes = np.stack(
[
np.stack([boxes[:, 0], boxes[:, 1]], axis=-1),
np.stack([boxes[:, 2], boxes[:, 1]], axis=-1),
np.stack([boxes[:, 2], boxes[:, 3]], axis=-1),
np.stack([boxes[:, 0], boxes[:, 3]], axis=-1),
], axis=1
)
self.data.append((img_name, dict(boxes=boxes, labels=classes)))
self.root = tmp_root

Expand Down
6 changes: 3 additions & 3 deletions doctr/datasets/funsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class FUNSD(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -37,7 +37,7 @@ class FUNSD(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -62,7 +62,7 @@ def __init__(
_targets = [(word['text'], word['box']) for block in data['form']
for word in block['words'] if len(word['text']) > 0]
text_targets, box_targets = zip(*_targets)
if rotated_bbox:
if use_polygons:
# box_targets: xmin, ymin, xmax, ymax -> x, y, w, h, alpha = 0
box_targets = [
[
Expand Down
33 changes: 21 additions & 12 deletions doctr/datasets/ic03.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class IC03(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -39,7 +39,7 @@ class IC03(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -62,20 +62,25 @@ def __init__(
if not os.path.exists(os.path.join(tmp_root, name.text)):
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}")

if rotated_bbox:
# x_center, y_center, width, height, 0
if use_polygons:
_boxes = [
[float(rect.attrib['x']) + float(rect.attrib['width']) / 2,
float(rect.attrib['y']) + float(rect.attrib['height']) / 2,
float(rect.attrib['width']), float(rect.attrib['height']), float(rect.attrib['rotation'])]
[
[float(rect.attrib['x']), float(rect.attrib['y'])],
[float(rect.attrib['x']) + float(rect.attrib['width']), float(rect.attrib['y'])],
[
float(rect.attrib['x']) + float(rect.attrib['width']),
float(rect.attrib['y']) + float(rect.attrib['height'])
],
[float(rect.attrib['x']), float(rect.attrib['y']) + float(rect.attrib['height'])],
]
for rect in rectangles
]
else:
# x_min, y_min, x_max, y_max
_boxes = [
[float(rect.attrib['x']), float(rect.attrib['y']),
float(rect.attrib['x']) + float(rect.attrib['width']),
float(rect.attrib['y']) + float(rect.attrib['height'])]
[float(rect.attrib['x']), float(rect.attrib['y']), # type: ignore[list-item]
float(rect.attrib['x']) + float(rect.attrib['width']), # type: ignore[list-item]
float(rect.attrib['y']) + float(rect.attrib['height'])] # type: ignore[list-item]
for rect in rectangles
]

Expand All @@ -84,8 +89,12 @@ def __init__(
# Convert them to relative
w, h = int(resolution.attrib['x']), int(resolution.attrib['y'])
boxes = np.asarray(_boxes, dtype=np_dtype)
boxes[:, [0, 2]] /= w
boxes[:, [1, 3]] /= h
if use_polygons:
boxes[:, :, 0] /= w
boxes[:, :, 1] /= h
else:
boxes[:, [0, 2]] /= w
boxes[:, [1, 3]] /= h

# Get the labels
labels = [lab.text for rect in rectangles for lab in rect if lab.text]
Expand Down
29 changes: 19 additions & 10 deletions doctr/datasets/ic13.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ class IC13(AbstractDataset):
Args:
img_folder: folder with all the images of the dataset
label_folder: folder with all annotation files for the images
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
"""

def __init__(
self,
img_folder: str,
label_folder: str,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:
super().__init__(img_folder, **kwargs)
Expand Down Expand Up @@ -64,13 +64,18 @@ def __init__(
labels = [line[-1] for line in _lines]
# xmin, ymin, xmax, ymax
box_targets = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype)
if rotated_bbox:
if use_polygons:
# x_center, y_center, width, height, 0
box_targets = np.array([[coords[0] + (coords[2] - coords[0]) / 2,
coords[1] + (coords[3] - coords[1]) / 2,
(coords[2] - coords[0]),
(coords[3] - coords[1]), 0.0] for coords in box_targets], dtype=np_dtype)

box_targets = np.array(
[
[
[coords[0], coords[1]],
[coords[2], coords[1]],
[coords[2], coords[3]],
[coords[0], coords[3]],
] for coords in box_targets
], dtype=np_dtype
)
self.data.append((img_path, dict(boxes=box_targets, labels=labels)))

def __getitem__(self, index: int) -> Tuple[np.ndarray, Dict[str, Any]]:
Expand All @@ -81,8 +86,12 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, Dict[str, Any]]:

# Boxes
boxes = target['boxes'].copy()
boxes[..., [0, 2]] /= w
boxes[..., [1, 3]] /= h
if boxes.ndim == 3:
boxes[..., 0] /= w
boxes[..., 1] /= h
else:
boxes[..., [0, 2]] /= w
boxes[..., [1, 3]] /= h
boxes = boxes.clip(0, 1)
target['boxes'] = boxes

Expand Down
15 changes: 11 additions & 4 deletions doctr/datasets/iiit5k.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class IIIT5K(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -38,7 +38,7 @@ class IIIT5K(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -61,9 +61,16 @@ def __init__(
if not os.path.exists(os.path.join(tmp_root, _raw_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")

if rotated_bbox:
if use_polygons:
# x_center, y_center, w, h, alpha = 0
box_targets = [[box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3], 0] for box in box_targets]
box_targets = [
[
[box[0], box[1]],
[box[0] + box[2], box[1]],
[box[0] + box[2], box[1] + box[3]],
[box[0], box[1] + box[3]],
] for box in box_targets
]
else:
# x, y, width, height -> xmin, ymin, xmax, ymax
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
Expand Down
19 changes: 6 additions & 13 deletions doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class SROIE(VisionDataset):

Args:
train: whether the subset should be the training one
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -38,7 +38,7 @@ class SROIE(VisionDataset):
def __init__(
self,
train: bool = True,
rotated_bbox: bool = False,
use_polygons: bool = False,
**kwargs: Any,
) -> None:

Expand All @@ -65,17 +65,10 @@ def __init__(
coords = np.stack([np.array(list(map(int, row[:8])), dtype=np_dtype).reshape((4, 2))
for row in _rows], axis=0)

if rotated_bbox:
# x_center, y_center, w, h, alpha = 0
mins = coords.min(axis=1)
maxs = coords.max(axis=1)
box_targets = np.concatenate(
((mins + maxs) / 2, maxs - mins, np.zeros((coords.shape[0], 1))), axis=1)
else:
# xmin, ymin, xmax, ymax
box_targets = np.concatenate((coords.min(axis=1), coords.max(axis=1)), axis=1)

self.data.append((img_path, dict(boxes=box_targets, labels=labels)))
if not use_polygons:
coords = np.concatenate((coords.min(axis=1), coords.max(axis=1)), axis=1)

self.data.append((img_path, dict(boxes=coords, labels=labels)))

self.root = tmp_root

Expand Down
Loading