mindee · fg-mindee · Dec 10, 2021 · Nov 13, 2021 · Nov 15, 2021 · Nov 15, 2021
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -23,6 +23,7 @@ Here are all datasets that are available through docTR:
 .. autoclass:: SVT
 .. autoclass:: SynthText
 .. autoclass:: IC03
+.. autoclass:: IC13
 
 
 Data Loading

diff --git a/doctr/datasets/__init__.py b/doctr/datasets/__init__.py
@@ -6,6 +6,7 @@
 from .doc_artefacts import *
 from .funsd import *
 from .ic03 import *
+from .ic13 import *
 from .iiit5k import *
 from .ocr import *
 from .recognition import *

diff --git a/doctr/datasets/ic13.py b/doctr/datasets/ic13.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2021, Mindee.
+
+# This program is licensed under the Apache License version 2.
+# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
+
+import csv
+import os
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import numpy as np
+
+from .datasets import AbstractDataset
+
+__all__ = ["IC13"]
+
+
+class IC13(AbstractDataset):
+    """IC13 dataset from `"ICDAR 2013 Robust Reading Competition" <https://rrc.cvc.uab.es/>`_.
+    Example::
+        >>> # NOTE: You need to download both image and label parts from Focused Scene Text challenge Task2.1 2013-2015.
+        >>> from doctr.datasets import IC13
+        >>> train_set = IC13(img_folder="/path/to/Challenge2_Training_Task12_Images",
+        >>>                  label_folder="/path/to/Challenge2_Training_Task1_GT")
+        >>> img, target = train_set[0]
+        >>> test_set = IC13(img_folder="/path/to/Challenge2_Test_Task12_Images",
+        >>>                 label_folder="/path/to/Challenge2_Test_Task1_GT")
+        >>> img, target = test_set[0]
+    Args:
+        img_folder: folder with all the images of the dataset
+        label_folder: folder with all annotation files for the images
+        sample_transforms: composable transformations that will be applied to each image
+        rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
+    """
+
+    def __init__(
+        self,
+        img_folder: str,
+        label_folder: str,
+        sample_transforms: Optional[Callable[[Any], Any]] = None,
+        rotated_bbox: bool = False,
+    ) -> None:
+        super().__init__(img_folder)
+        self.sample_transforms = sample_transforms
+
+        # File existence check
+        if not os.path.exists(label_folder) or not os.path.exists(img_folder):
+            raise FileNotFoundError(
+                f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}")
+
+        self.data: List[Tuple[Path, Dict[str, Any]]] = []
+        np_dtype = np.float32
+
+        img_names = os.listdir(img_folder)
+
+        for img_name in img_names:
+
+            img_path = Path(img_folder, img_name)
+            label_path = Path(label_folder, "gt_" + Path(img_name).stem + ".txt")
+
+            with open(label_path, newline='\n') as f:
+                _lines = [
+                    [val[:-1] if val.endswith(",") else val for val in row]
+                    for row in csv.reader(f, delimiter=' ', quotechar="'")
+                ]
+            labels = [line[-1] for line in _lines]
+            # xmin, ymin, xmax, ymax
+            box_targets = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype)
+            if rotated_bbox:
+                # x_center, y_center, width, height, 0
+                box_targets = np.array([[coords[0] + (coords[2] - coords[0]) / 2,
+                                         coords[1] + (coords[3] - coords[1]) / 2,
+                                         (coords[2] - coords[0]),
+                                         (coords[3] - coords[1]), 0.0] for coords in box_targets], dtype=np_dtype)
+
+            self.data.append((img_path, dict(boxes=box_targets, labels=labels)))
+
+    def __getitem__(self, index: int) -> Tuple[np.ndarray, Dict[str, Any]]:
+        img, target = self._read_sample(index)
+        h, w = self._get_img_shape(img)
+        if self.sample_transforms is not None:
+            img = self.sample_transforms(img)
+
+        # Boxes
+        boxes = target['boxes'].copy()
+        boxes[..., [0, 2]] /= w
+        boxes[..., [1, 3]] /= h
+        boxes = boxes.clip(0, 1)
+        target['boxes'] = boxes
+
+        return img, target
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -124,3 +124,23 @@ def mock_ocrdataset(tmpdir_factory, mock_image_stream):
             f.write(file.getbuffer())
 
     return str(image_folder), str(label_file)
+
+
+@pytest.fixture(scope="session")
+def mock_ic13(tmpdir_factory, mock_image_stream):
+    file = BytesIO(mock_image_stream)
+    image_folder = tmpdir_factory.mktemp("images")
+    label_folder = tmpdir_factory.mktemp("labels")
+    labels = ["1309, 2240, 1440, 2341, 'I'\n",
+              "800, 2240, 1440, 2341, 'am'\n",
+              "500, 2240, 1440, 2341, 'a'\n",
+              "900, 2240, 1440, 2341, 'jedi'\n",
+              "400, 2240, 1440, 2341, '!'"]
+    for i in range(5):
+        fn_l = label_folder.join(f"gt_mock_image_file_{i}.txt")
+        with open(fn_l, 'w') as f:
+            f.writelines(labels)
+        fn_i = image_folder.join(f"mock_image_file_{i}.jpg")
+        with open(fn_i, 'wb') as f:
+            f.write(file.getbuffer())
+    return str(image_folder), str(label_folder)
diff --git a/tests/pytorch/test_datasets_pt.py b/tests/pytorch/test_datasets_pt.py
@@ -201,3 +201,34 @@ def test_charactergenerator():
     assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
     assert isinstance(targets, torch.Tensor) and targets.shape == (2,)
     assert targets.dtype == torch.int64
+
+
+@pytest.mark.parametrize(
+    "size, rotate",
+    [
+        [5, True],  # Actual set has 229 train and 233 test samples
+        [5, False]
+
+    ],
+)
+def test_ic13_dataset(mock_ic13, size, rotate):
+    input_size = (512, 512)
+    ds = datasets.IC13(
+        *mock_ic13,
+        sample_transforms=Resize(input_size),
+        rotated_bbox=rotate,
+    )
+
+    assert len(ds) == size
+    img, target = ds[0]
+    assert isinstance(img, torch.Tensor)
+    assert img.shape[-2:] == input_size
+    assert img.dtype == torch.float32
+    assert isinstance(target, dict)
+    assert isinstance(target['boxes'], np.ndarray) and np.all((target['boxes'] <= 1) & (target['boxes'] >= 0))
+    assert isinstance(target['labels'], list) and all(isinstance(s, str) for s in target['labels'])
+
+    loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn)
+    images, targets = next(iter(loader))
+    assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
+    assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)
diff --git a/tests/tensorflow/test_datasets_tf.py b/tests/tensorflow/test_datasets_tf.py
@@ -187,3 +187,34 @@ def test_charactergenerator():
     assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3)
     assert isinstance(targets, tf.Tensor) and targets.shape == (2,)
     assert targets.dtype == tf.int32
+
+
+@pytest.mark.parametrize(
+    "size, rotate",
+    [
+        [5, True],  # Actual set has 229 train and 233 test samples
+        [5, False]
+
+    ],
+)
+def test_ic13_dataset(mock_ic13, size, rotate):
+    input_size = (512, 512)
+    ds = datasets.IC13(
+        *mock_ic13,
+        sample_transforms=Resize(input_size),
+        rotated_bbox=rotate,
+    )
+
+    assert len(ds) == size
+    img, target = ds[0]
+    assert isinstance(img, tf.Tensor)
+    assert img.shape[:2] == input_size
+    assert img.dtype == tf.float32
+    assert isinstance(target, dict)
+    assert isinstance(target['boxes'], np.ndarray) and np.all((target['boxes'] <= 1) & (target['boxes'] >= 0))
+    assert isinstance(target['labels'], list) and all(isinstance(s, str) for s in target['labels'])
+
+    loader = DataLoader(ds, batch_size=2)
+    images, targets = next(iter(loader))
+    assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3)
+    assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)