Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICDAR2003 dataset integration #653

Merged
merged 26 commits into from
Nov 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e7122a5
start synth
felixdittrich92 Nov 13, 2021
4fa0aff
cleanup
felixdittrich92 Nov 15, 2021
3ffdcc5
Merge branch 'mindee:main' into main
felixdittrich92 Nov 15, 2021
b74f06a
start synth
felixdittrich92 Nov 15, 2021
1a661e0
add synthtext
felixdittrich92 Nov 15, 2021
6270c93
add docu and tests
felixdittrich92 Nov 15, 2021
d74f148
apply code factor suggestions
felixdittrich92 Nov 15, 2021
9099e95
apply changes
felixdittrich92 Nov 15, 2021
23eca0d
Merge branch 'mindee:main' into main
felixdittrich92 Nov 15, 2021
7ba31e1
clean
felixdittrich92 Nov 15, 2021
02a8104
Merge branch 'mindee:main' into main
felixdittrich92 Nov 15, 2021
6955110
Merge branch 'mindee:main' into main
felixdittrich92 Nov 16, 2021
8fbeb30
Merge branch 'mindee:main' into main
felixdittrich92 Nov 16, 2021
7408935
Merge branch 'mindee:main' into main
felixdittrich92 Nov 17, 2021
a2b0fbc
Merge branch 'mindee:main' into main
felixdittrich92 Nov 19, 2021
a9cbd14
Merge branch 'mindee:main' into main
felixdittrich92 Nov 20, 2021
b245443
Merge branch 'mindee:main' into main
felixdittrich92 Nov 23, 2021
0cb2f7b
Merge branch 'mindee:main' into main
felixdittrich92 Nov 23, 2021
743c54a
Merge branch 'mindee:main' into main
felixdittrich92 Nov 25, 2021
1c1cbcb
Merge branch 'mindee:main' into main
felixdittrich92 Nov 25, 2021
cfbd898
Merge branch 'mindee:main' into main
felixdittrich92 Nov 30, 2021
2c764c2
start icdar2003
felixdittrich92 Nov 26, 2021
8ffe184
to relative
felixdittrich92 Nov 26, 2021
67128f9
up
felixdittrich92 Nov 26, 2021
2cee810
skip empty and to relative coords
felixdittrich92 Nov 29, 2021
247ef92
apply changes
felixdittrich92 Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Here are all datasets that are available through docTR:
.. autoclass:: IIIT5K
.. autoclass:: SVT
.. autoclass:: SynthText
.. autoclass:: IC03


Data Loading
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .detection import *
from .doc_artefacts import *
from .funsd import *
from .ic03 import *
from .iiit5k import *
from .ocr import *
from .recognition import *
Expand Down
100 changes: 100 additions & 0 deletions doctr/datasets/ic03.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (C) 2021, Mindee.

# This program is licensed under the Apache License version 2.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

import os
from typing import Any, Callable, Dict, List, Optional, Tuple

import defusedxml.ElementTree as ET
import numpy as np

from .datasets import VisionDataset

__all__ = ['IC03']


class IC03(VisionDataset):
"""IC03 dataset from `"ICDAR 2003 Robust Reading Competitions: Entries, Results and Future Directions"
<http://www.iapr-tc11.org/mediawiki/index.php?title=ICDAR_2003_Robust_Reading_Competitions>`_.

Example::
>>> from doctr.datasets import IC03
>>> train_set = IC03(train=True, download=True)
>>> img, target = train_set[0]

Args:
train: whether the subset should be the training one
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

TRAIN = ('http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTrain/scene.zip',
'9d86df514eb09dd693fb0b8c671ef54a0cfe02e803b1bbef9fc676061502eb94',
'ic03_train.zip')
TEST = ('http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTest/scene.zip',
'dbc4b5fd5d04616b8464a1b42ea22db351ee22c2546dd15ac35611857ea111f8',
'ic03_test.zip')

def __init__(
self,
train: bool = True,
sample_transforms: Optional[Callable[[Any], Any]] = None,
rotated_bbox: bool = False,
**kwargs: Any,
) -> None:

url, sha256, file_name = self.TRAIN if train else self.TEST
super().__init__(url, file_name, sha256, True, **kwargs)
self.sample_transforms = sample_transforms
self.train = train
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float32

# Load xml data
tmp_root = os.path.join(self.root, 'SceneTrialTrain' if self.train else 'SceneTrialTest')
xml_tree = ET.parse(os.path.join(tmp_root, 'words.xml'))
xml_root = xml_tree.getroot()

for image in xml_root:
name, resolution, rectangles = image

# File existence check
if not os.path.exists(os.path.join(tmp_root, name.text)):
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}")

if rotated_bbox:
# x_center, y_center, width, height, 0
_boxes = [
[float(rect.attrib['x']) + float(rect.attrib['width']) / 2,
float(rect.attrib['y']) + float(rect.attrib['height']) / 2,
float(rect.attrib['width']), float(rect.attrib['height']), float(rect.attrib['rotation'])]
for rect in rectangles
]
else:
# x_min, y_min, x_max, y_max
_boxes = [
[float(rect.attrib['x']), float(rect.attrib['y']),
float(rect.attrib['x']) + float(rect.attrib['width']),
float(rect.attrib['y']) + float(rect.attrib['height'])]
for rect in rectangles
]

# filter images without boxes
if len(_boxes) > 0:
# Convert them to relative
w, h = int(resolution.attrib['x']), int(resolution.attrib['y'])
boxes = np.asarray(_boxes, dtype=np_dtype)
boxes[:, [0, 2]] /= w
boxes[:, [1, 3]] /= h

# Get the labels
labels = [lab.text for rect in rectangles for lab in rect if lab.text]

self.data.append((name.text, dict(boxes=boxes, labels=labels)))

self.root = tmp_root

def extra_repr(self) -> str:
return f"train={self.train}"
4 changes: 3 additions & 1 deletion doctr/datasets/svt.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,15 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}")

if rotated_bbox:
# x_center, y_center, width, height, 0
_boxes = [
[float(rect.attrib['x']) + float(rect.attrib['width']) / 2,
float(rect.attrib['y']) + float(rect.attrib['height']) / 2,
float(rect.attrib['width']), float(rect.attrib['height'])]
float(rect.attrib['width']), float(rect.attrib['height']), 0.0]
for rect in rectangles
]
else:
# x_min, y_min, x_max, y_max
_boxes = [
[float(rect.attrib['x']), float(rect.attrib['y']),
float(rect.attrib['x']) + float(rect.attrib['width']),
Expand Down
2 changes: 2 additions & 0 deletions tests/pytorch/test_datasets_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def test_visiondataset():
['SVT', False, [512, 512], 249, False],
['SynthText', True, [512, 512], 27, True], # Actual set has 772875 samples
['SynthText', False, [512, 512], 3, False], # Actual set has 85875 samples
['IC03', True, [512, 512], 246, True],
['IC03', False, [512, 512], 249, False],
],
)
def test_dataset(dataset_name, train, input_size, size, rotate):
Expand Down
2 changes: 2 additions & 0 deletions tests/tensorflow/test_datasets_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
['SVT', False, [512, 512], 249, False],
['SynthText', True, [512, 512], 27, True], # Actual set has 772875 samples
['SynthText', False, [512, 512], 3, False], # Actual set has 85875 samples
['IC03', True, [512, 512], 246, True],
['IC03', False, [512, 512], 249, False],
],
)
def test_dataset(dataset_name, train, input_size, size, rotate):
Expand Down