Skip to content

Commit

Permalink
Merge branch 'mindee:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 authored Dec 2, 2021
2 parents 47ed381 + 7244d43 commit eb7f59c
Show file tree
Hide file tree
Showing 11 changed files with 22 additions and 28 deletions.
6 changes: 4 additions & 2 deletions doctr/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def __init__(
self.paragraph_break = paragraph_break
self.export_as_straight_boxes = export_as_straight_boxes

def _sort_boxes(self, boxes: np.ndarray) -> np.ndarray:
@staticmethod
def _sort_boxes(boxes: np.ndarray) -> np.ndarray:
"""Sort bounding boxes from top to bottom, left to right
Args:
Expand Down Expand Up @@ -144,7 +145,8 @@ def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]:

return lines

def _resolve_blocks(self, boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
@staticmethod
def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
"""Order lines to group them in blocks
Args:
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/master/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ def make_mask(self, target: torch.Tensor) -> torch.Tensor:
combined_mask = target_padding_mask | look_ahead_mask
return torch.tile(combined_mask.permute(1, 0, 2), (self.num_heads, 1, 1))

@staticmethod
def compute_loss(
self,
model_output: torch.Tensor,
gt: torch.Tensor,
seq_len: torch.Tensor,
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/master/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ def make_mask(self, target: tf.Tensor) -> tf.Tensor:
combined_mask = tf.maximum(target_padding_mask, look_ahead_mask)
return combined_mask

@staticmethod
def compute_loss(
self,
model_output: tf.Tensor,
gt: tf.Tensor,
seq_len: List[int],
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/sar/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ def forward(

return out

@staticmethod
def compute_loss(
self,
model_output: torch.Tensor,
gt: torch.Tensor,
seq_len: torch.Tensor,
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/recognition/sar/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def __init__(

self.postprocessor = SARPostProcessor(vocab=vocab)

@staticmethod
def compute_loss(
self,
model_output: tf.Tensor,
gt: tf.Tensor,
seq_len: tf.Tensor,
Expand Down
6 changes: 3 additions & 3 deletions doctr/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ class LocalizationConfusion:
.. math::
\forall Y \in \mathcal{B}^N, \forall X \in \mathcal{B}^M, \\
Recall(X, Y) = \frac{1}{N} \sum\limits_{i=1}^N g_{X}(Y_i) \\
Precision(X, Y) = \frac{1}{M} \sum\limits_{i=1}^N g_{X}(Y_i) \\
Precision(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M g_{X}(Y_i) \\
meanIoU(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(X_i, Y_j)
with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and
Expand Down Expand Up @@ -442,7 +442,7 @@ class OCRMetric:
\forall (B, L) \in \mathcal{B}^N \times \mathcal{L}^N,
\forall (\hat{B}, \hat{L}) \in \mathcal{B}^M \times \mathcal{L}^M, \\
Recall(B, \hat{B}, L, \hat{L}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,L}(\hat{B}_i, \hat{L}_i) \\
Precision(B, \hat{B}, L, \hat{L}) = \frac{1}{M} \sum\limits_{i=1}^N h_{B,L}(\hat{B}_i, \hat{L}_i) \\
Precision(B, \hat{B}, L, \hat{L}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,L}(\hat{B}_i, \hat{L}_i) \\
meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(\hat{B}_i, B_j)
with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and
Expand Down Expand Up @@ -572,7 +572,7 @@ class DetectionMetric:
\forall (B, C) \in \mathcal{B}^N \times \mathcal{C}^N,
\forall (\hat{B}, \hat{C}) \in \mathcal{B}^M \times \mathcal{C}^M, \\
Recall(B, \hat{B}, C, \hat{C}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,C}(\hat{B}_i, \hat{C}_i) \\
Precision(B, \hat{B}, C, \hat{C}) = \frac{1}{M} \sum\limits_{i=1}^N h_{B,C}(\hat{B}_i, \hat{C}_i) \\
Precision(B, \hat{B}, C, \hat{C}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,C}(\hat{B}_i, \hat{C}_i) \\
meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(\hat{B}_i, B_j)
with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and
Expand Down
7 changes: 2 additions & 5 deletions references/detection/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, m
model.train()
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
for _ in progress_bar(range(len(train_loader)), parent=mb):
images, targets = next(train_iter)
for images, targets in progress_bar(train_iter, parent=mb):

if torch.cuda.is_available():
images = images.cuda()
Expand Down Expand Up @@ -112,7 +111,6 @@ def main(args):
img_folder=os.path.join(args.val_path, 'images'),
label_path=os.path.join(args.val_path, 'labels.json'),
sample_transforms=T.Resize((args.input_size, args.input_size)),
rotated_bbox=args.rotation
)
val_loader = DataLoader(
val_set,
Expand All @@ -131,7 +129,7 @@ def main(args):
batch_transforms = Normalize(mean=(0.798, 0.785, 0.772), std=(0.264, 0.2749, 0.287))

# Load doctr model
model = detection.__dict__[args.arch](pretrained=args.pretrained)
model = detection.__dict__[args.arch](pretrained=args.pretrained, assume_straight_pages=not args.rotation)

# Resume weights
if isinstance(args.resume, str):
Expand Down Expand Up @@ -175,7 +173,6 @@ def main(args):
T.RandomApply(T.ColorInversion(), .1),
ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
]),
rotated_bbox=args.rotation
)

train_loader = DataLoader(
Expand Down
8 changes: 3 additions & 5 deletions references/detection/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb):
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
for batch_step in progress_bar(range(train_loader.num_batches), parent=mb):
images, targets = next(train_iter)
for images, targets in progress_bar(train_iter, parent=mb):

images = batch_transforms(images)

Expand Down Expand Up @@ -80,7 +79,6 @@ def main(args):
img_folder=os.path.join(args.val_path, 'images'),
label_path=os.path.join(args.val_path, 'labels.json'),
sample_transforms=T.Resize((args.input_size, args.input_size)),
rotated_bbox=args.rotation
)
val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, workers=args.workers)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
Expand All @@ -95,7 +93,8 @@ def main(args):
# Load doctr model
model = detection.__dict__[args.arch](
pretrained=args.pretrained,
input_shape=(args.input_size, args.input_size, 3)
input_shape=(args.input_size, args.input_size, 3),
assume_straight_pages=not args.rotation,
)

# Resume weights
Expand Down Expand Up @@ -126,7 +125,6 @@ def main(args):
T.RandomContrast(.3),
T.RandomBrightness(.3),
]),
rotated_bbox=args.rotation
)
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, workers=args.workers)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
Expand Down
9 changes: 4 additions & 5 deletions references/obj_detection/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
def convert_to_abs_coords(targets, img_shape):
height, width = img_shape[-2:]

for idx in range(len(targets)):
targets[idx]['boxes'][:, 0::2] = (targets[idx]['boxes'][:, 0::2] * width).round()
targets[idx]['boxes'][:, 1::2] = (targets[idx]['boxes'][:, 1::2] * height).round()
for idx, t in enumerate(targets):
targets[idx]['boxes'][:, 0::2] = (t['boxes'][:, 0::2] * width).round()
targets[idx]['boxes'][:, 1::2] = (t['boxes'][:, 1::2] * height).round()

targets = [{
"boxes": torch.from_numpy(t['boxes']).to(dtype=torch.float32),
Expand All @@ -44,8 +44,7 @@ def fit_one_epoch(model, train_loader, optimizer, scheduler, mb, ):
model.train()
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
for _ in progress_bar(range(len(train_loader)), parent=mb):
images, targets = next(train_iter)
for images, targets in progress_bar(train_iter, parent=mb):
optimizer.zero_grad()
targets = convert_to_abs_coords(targets, images.shape)
if torch.cuda.is_available():
Expand Down
3 changes: 1 addition & 2 deletions references/recognition/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, m
model.train()
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
for _ in progress_bar(range(len(train_loader)), parent=mb):
images, targets = next(train_iter)
for images, targets in progress_bar(train_iter, parent=mb):

if torch.cuda.is_available():
images = images.cuda()
Expand Down
3 changes: 1 addition & 2 deletions references/recognition/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb):
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
for batch_step in progress_bar(range(train_loader.num_batches), parent=mb):
images, targets = next(train_iter)
for images, targets in progress_bar(train_iter, parent=mb):

images = batch_transforms(images)

Expand Down

0 comments on commit eb7f59c

Please sign in to comment.