Merge branch 'mindee:main' into main

mindee · Dec 2, 2021 · eb7f59c · eb7f59c
2 parents 47ed381 + 7244d43
commit eb7f59c
Show file tree

Hide file tree

Showing 11 changed files with 22 additions and 28 deletions.
diff --git a/doctr/models/builder.py b/doctr/models/builder.py
@@ -40,7 +40,8 @@ def __init__(
         self.paragraph_break = paragraph_break
         self.export_as_straight_boxes = export_as_straight_boxes
 
-    def _sort_boxes(self, boxes: np.ndarray) -> np.ndarray:
+    @staticmethod
+    def _sort_boxes(boxes: np.ndarray) -> np.ndarray:
         """Sort bounding boxes from top to bottom, left to right
 
         Args:
@@ -144,7 +145,8 @@ def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]:
 
         return lines
 
-    def _resolve_blocks(self, boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
+    @staticmethod
+    def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
         """Order lines to group them in blocks
 
         Args:

diff --git a/doctr/models/recognition/master/pytorch.py b/doctr/models/recognition/master/pytorch.py
@@ -217,8 +217,8 @@ def make_mask(self, target: torch.Tensor) -> torch.Tensor:
         combined_mask = target_padding_mask | look_ahead_mask
         return torch.tile(combined_mask.permute(1, 0, 2), (self.num_heads, 1, 1))
 
+    @staticmethod
     def compute_loss(
-        self,
         model_output: torch.Tensor,
         gt: torch.Tensor,
         seq_len: torch.Tensor,

diff --git a/doctr/models/recognition/master/tensorflow.py b/doctr/models/recognition/master/tensorflow.py
@@ -228,8 +228,8 @@ def make_mask(self, target: tf.Tensor) -> tf.Tensor:
         combined_mask = tf.maximum(target_padding_mask, look_ahead_mask)
         return combined_mask
 
+    @staticmethod
     def compute_loss(
-        self,
         model_output: tf.Tensor,
         gt: tf.Tensor,
         seq_len: List[int],

diff --git a/doctr/models/recognition/sar/pytorch.py b/doctr/models/recognition/sar/pytorch.py
@@ -206,8 +206,8 @@ def forward(
 
         return out
 
+    @staticmethod
     def compute_loss(
-        self,
         model_output: torch.Tensor,
         gt: torch.Tensor,
         seq_len: torch.Tensor,

diff --git a/doctr/models/recognition/sar/tensorflow.py b/doctr/models/recognition/sar/tensorflow.py
@@ -213,8 +213,8 @@ def __init__(
 
         self.postprocessor = SARPostProcessor(vocab=vocab)
 
+    @staticmethod
     def compute_loss(
-        self,
         model_output: tf.Tensor,
         gt: tf.Tensor,
         seq_len: tf.Tensor,

diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py
@@ -345,7 +345,7 @@ class LocalizationConfusion:
     .. math::
         \forall Y \in \mathcal{B}^N, \forall X \in \mathcal{B}^M, \\
         Recall(X, Y) = \frac{1}{N} \sum\limits_{i=1}^N g_{X}(Y_i) \\
-        Precision(X, Y) = \frac{1}{M} \sum\limits_{i=1}^N g_{X}(Y_i) \\
+        Precision(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M g_{X}(Y_i) \\
         meanIoU(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]}  IoU(X_i, Y_j)
 
     with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and
@@ -442,7 +442,7 @@ class OCRMetric:
         \forall (B, L) \in \mathcal{B}^N \times \mathcal{L}^N,
         \forall (\hat{B}, \hat{L}) \in \mathcal{B}^M \times \mathcal{L}^M, \\
         Recall(B, \hat{B}, L, \hat{L}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,L}(\hat{B}_i, \hat{L}_i) \\
-        Precision(B, \hat{B}, L, \hat{L}) = \frac{1}{M} \sum\limits_{i=1}^N h_{B,L}(\hat{B}_i, \hat{L}_i) \\
+        Precision(B, \hat{B}, L, \hat{L}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,L}(\hat{B}_i, \hat{L}_i) \\
         meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]}  IoU(\hat{B}_i, B_j)
 
     with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and
@@ -572,7 +572,7 @@ class DetectionMetric:
         \forall (B, C) \in \mathcal{B}^N \times \mathcal{C}^N,
         \forall (\hat{B}, \hat{C}) \in \mathcal{B}^M \times \mathcal{C}^M, \\
         Recall(B, \hat{B}, C, \hat{C}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,C}(\hat{B}_i, \hat{C}_i) \\
-        Precision(B, \hat{B}, C, \hat{C}) = \frac{1}{M} \sum\limits_{i=1}^N h_{B,C}(\hat{B}_i, \hat{C}_i) \\
+        Precision(B, \hat{B}, C, \hat{C}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,C}(\hat{B}_i, \hat{C}_i) \\
         meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]}  IoU(\hat{B}_i, B_j)
 
     with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
@@ -37,8 +37,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, m
     model.train()
     train_iter = iter(train_loader)
     # Iterate over the batches of the dataset
-    for _ in progress_bar(range(len(train_loader)), parent=mb):
-        images, targets = next(train_iter)
+    for images, targets in progress_bar(train_iter, parent=mb):
 
         if torch.cuda.is_available():
             images = images.cuda()
@@ -112,7 +111,6 @@ def main(args):
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
         sample_transforms=T.Resize((args.input_size, args.input_size)),
-        rotated_bbox=args.rotation
     )
     val_loader = DataLoader(
         val_set,
@@ -131,7 +129,7 @@ def main(args):
     batch_transforms = Normalize(mean=(0.798, 0.785, 0.772), std=(0.264, 0.2749, 0.287))
 
     # Load doctr model
-    model = detection.__dict__[args.arch](pretrained=args.pretrained)
+    model = detection.__dict__[args.arch](pretrained=args.pretrained, assume_straight_pages=not args.rotation)
 
     # Resume weights
     if isinstance(args.resume, str):
@@ -175,7 +173,6 @@ def main(args):
             T.RandomApply(T.ColorInversion(), .1),
             ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
         ]),
-        rotated_bbox=args.rotation
     )
 
     train_loader = DataLoader(

diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
@@ -32,8 +32,7 @@
 def fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb):
     train_iter = iter(train_loader)
     # Iterate over the batches of the dataset
-    for batch_step in progress_bar(range(train_loader.num_batches), parent=mb):
-        images, targets = next(train_iter)
+    for images, targets in progress_bar(train_iter, parent=mb):
 
         images = batch_transforms(images)
 
@@ -80,7 +79,6 @@ def main(args):
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
         sample_transforms=T.Resize((args.input_size, args.input_size)),
-        rotated_bbox=args.rotation
     )
     val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, workers=args.workers)
     print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
@@ -95,7 +93,8 @@ def main(args):
     # Load doctr model
     model = detection.__dict__[args.arch](
         pretrained=args.pretrained,
-        input_shape=(args.input_size, args.input_size, 3)
+        input_shape=(args.input_size, args.input_size, 3),
+        assume_straight_pages=not args.rotation,
     )
 
     # Resume weights
@@ -126,7 +125,6 @@ def main(args):
             T.RandomContrast(.3),
             T.RandomBrightness(.3),
         ]),
-        rotated_bbox=args.rotation
     )
     train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, workers=args.workers)
     print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "

diff --git a/references/obj_detection/train_pytorch.py b/references/obj_detection/train_pytorch.py
@@ -27,9 +27,9 @@
 def convert_to_abs_coords(targets, img_shape):
     height, width = img_shape[-2:]
 
-    for idx in range(len(targets)):
-        targets[idx]['boxes'][:, 0::2] = (targets[idx]['boxes'][:, 0::2] * width).round()
-        targets[idx]['boxes'][:, 1::2] = (targets[idx]['boxes'][:, 1::2] * height).round()
+    for idx, t in enumerate(targets):
+        targets[idx]['boxes'][:, 0::2] = (t['boxes'][:, 0::2] * width).round()
+        targets[idx]['boxes'][:, 1::2] = (t['boxes'][:, 1::2] * height).round()
 
     targets = [{
         "boxes": torch.from_numpy(t['boxes']).to(dtype=torch.float32),
@@ -44,8 +44,7 @@ def fit_one_epoch(model, train_loader, optimizer, scheduler, mb, ):
     model.train()
     train_iter = iter(train_loader)
     # Iterate over the batches of the dataset
-    for _ in progress_bar(range(len(train_loader)), parent=mb):
-        images, targets = next(train_iter)
+    for images, targets in progress_bar(train_iter, parent=mb):
         optimizer.zero_grad()
         targets = convert_to_abs_coords(targets, images.shape)
         if torch.cuda.is_available():

diff --git a/references/recognition/train_pytorch.py b/references/recognition/train_pytorch.py
@@ -38,8 +38,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, m
     model.train()
     train_iter = iter(train_loader)
     # Iterate over the batches of the dataset
-    for _ in progress_bar(range(len(train_loader)), parent=mb):
-        images, targets = next(train_iter)
+    for images, targets in progress_bar(train_iter, parent=mb):
 
         if torch.cuda.is_available():
             images = images.cuda()

diff --git a/references/recognition/train_tensorflow.py b/references/recognition/train_tensorflow.py
@@ -33,8 +33,7 @@
 def fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb):
     train_iter = iter(train_loader)
     # Iterate over the batches of the dataset
-    for batch_step in progress_bar(range(train_loader.num_batches), parent=mb):
-        images, targets = next(train_iter)
+    for images, targets in progress_bar(train_iter, parent=mb):
 
         images = batch_transforms(images)