Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] update api for multi file and pdf support #1522

Merged
merged 11 commits into from
Apr 11, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update mypy + tests
felixdittrich92 committed Apr 10, 2024
commit 0525b69b0e34dfb4da9af8fa1ea7a7cc955de918
6 changes: 3 additions & 3 deletions api/tests/routes/test_ocr.py
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@ async def test_perform_ocr(test_app_asyncio, mock_detection_image, mock_txt_file
# Check that IoU with GT if reasonable
assert isinstance(json_response, list) and len(json_response) == 2
first_pred = json_response[0] # it's enough to test for the first file because the same image is used twice
assert isinstance(first_pred, dict) and len(first_pred["items"]) == gt_boxes.shape[0]
pred_boxes = np.array([elt["box"] for elt in first_pred["items"]])
pred_labels = np.array([elt["value"] for elt in first_pred["items"]])
assert isinstance(first_pred, dict) and len(first_pred["items"]["blocks"]["lines"]["words"]) == gt_boxes.shape[0]
pred_boxes = np.array([elt["geometry"] for elt in first_pred["items"]["blocks"]["lines"]["words"]])
pred_labels = np.array([elt["value"] for elt in first_pred["items"]["blocks"]["lines"]["words"]])
iou_mat = box_iou(gt_boxes, pred_boxes)
gt_idxs, pred_idxs = linear_sum_assignment(-iou_mat)
is_kept = iou_mat[gt_idxs, pred_idxs] >= 0.8
11 changes: 6 additions & 5 deletions doctr/datasets/generator/base.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ def synthesize_text_img(
font_family: Optional[str] = None,
background_color: Optional[Tuple[int, int, int]] = None,
text_color: Optional[Tuple[int, int, int]] = None,
) -> Image:
) -> Image.Image:
"""Generate a synthetic text image

Args:
@@ -81,7 +81,7 @@ def __init__(
self._data: List[Image.Image] = []
if cache_samples:
self._data = [
(synthesize_text_img(char, font_family=font), idx)
(synthesize_text_img(char, font_family=font), idx) # type: ignore[misc]
for idx, char in enumerate(self.vocab)
for font in self.font_family
]
@@ -93,7 +93,7 @@ def _read_sample(self, index: int) -> Tuple[Any, int]:
# Samples are already cached
if len(self._data) > 0:
idx = index % len(self._data)
pil_img, target = self._data[idx]
pil_img, target = self._data[idx] # type: ignore[misc]
else:
target = index % len(self.vocab)
pil_img = synthesize_text_img(self.vocab[target], font_family=random.choice(self.font_family))
@@ -132,7 +132,8 @@ def __init__(
if cache_samples:
_words = [self._generate_string(*self.wordlen_range) for _ in range(num_samples)]
self._data = [
(synthesize_text_img(text, font_family=random.choice(self.font_family)), text) for text in _words
(synthesize_text_img(text, font_family=random.choice(self.font_family)), text) # type: ignore[misc]
for text in _words
]

def _generate_string(self, min_chars: int, max_chars: int) -> str:
@@ -145,7 +146,7 @@ def __len__(self) -> int:
def _read_sample(self, index: int) -> Tuple[Any, str]:
# Samples are already cached
if len(self._data) > 0:
pil_img, target = self._data[index]
pil_img, target = self._data[index] # type: ignore[misc]
else:
target = self._generate_string(*self.wordlen_range)
pil_img = synthesize_text_img(target, font_family=random.choice(self.font_family))
2 changes: 1 addition & 1 deletion doctr/io/image/pytorch.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@
__all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]


def tensor_from_pil(pil_img: Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
"""Convert a PIL Image to a PyTorch tensor

Args:
2 changes: 1 addition & 1 deletion doctr/io/image/tensorflow.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
__all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]


def tensor_from_pil(pil_img: Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
"""Convert a PIL Image to a TensorFlow tensor

Args:
Loading
Oops, something went wrong.