[misc & build] replace isort pydocstyle and black with ruff (#1379)

mindee · Nov 15, 2023 · 6d92df5 · 6d92df5
1 parent e00cc79
commit 6d92df5
Show file tree

Hide file tree

Showing 153 changed files with 969 additions and 478 deletions.
diff --git a/.github/verify_pr_labels.py b/.github/verify_pr_labels.py
@@ -3,8 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-"""
-Borrowed & adapted from https://github.com/pytorch/vision/blob/main/.github/process_commit.py
+"""Borrowed & adapted from https://github.com/pytorch/vision/blob/main/.github/process_commit.py
 This script finds the merger responsible for labeling a PR by a commit SHA. It is used by the workflow in
 '.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled,
 this script is a no-op.

diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
@@ -26,45 +26,6 @@ jobs:
           ruff --version
           ruff check --diff .
 
-  black:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python: ["3.8"]
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python }}
-          architecture: x64
-      - name: Run black
-        run: |
-          pip install black
-          black --version
-          black --check --diff .
-
-  isort:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python: ["3.8"]
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python }}
-          architecture: x64
-      - name: Run isort
-        run: |
-          pip install isort
-          isort --version
-          isort .
-          if [ -n "$(git status --porcelain --untracked-files=no)" ]; then exit 1; else echo "All clear"; fi
-
   mypy:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -92,22 +53,3 @@ jobs:
         run: |
           mypy --version
           mypy
-
-  pydocstyle:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python: ["3.8"]
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python }}
-          architecture: x64
-      - name: Run pydocstyle
-        run: |
-          pip install pydocstyle[toml]
-          pydocstyle --version
-          pydocstyle
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: check-ast
       - id: check-yaml
@@ -15,15 +15,9 @@ repos:
       - id: check-merge-conflict
       - id: no-commit-to-branch
         args: ['--branch', 'main']
-  - repo: https://github.com/psf/black
-    rev: 23.1.0
-    hooks:
-      - id: black
-  - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.0.260'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.5
     hooks:
       - id: ruff
+        args: [ --fix ]
+      - id: ruff-format
diff --git a/Makefile b/Makefile
@@ -1,17 +1,13 @@
 .PHONY: quality style test test-common test-tf test-torch docs-single-version docs
 # this target runs checks on all files
 quality:
-	isort . -c
 	ruff check .
-	black --check .
 	mypy doctr/
-	pydocstyle doctr/
 
 # this target runs checks on all files and potentially modifies some of them
 style:
-	isort .
-	black .
 	ruff --fix .
+	ruff format .
 
 # Run tests for the library
 test:

diff --git a/demo/app.py b/demo/app.py
@@ -14,7 +14,6 @@
 
 if is_tf_available():
     import tensorflow as tf
-
     from backend.tensorflow import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
 
     if any(tf.config.experimental.list_physical_devices("gpu")):
@@ -24,15 +23,13 @@
 
 else:
     import torch
-
     from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
 
     forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 def main(det_archs, reco_archs):
     """Build a streamlit layout"""
-
     # Wide mode
     st.set_page_config(layout="wide")
 

diff --git a/demo/backend/pytorch.py b/demo/backend/pytorch.py
@@ -30,21 +30,37 @@
 ]
 
 
-def load_predictor(det_arch: str, reco_arch: str, device) -> OCRPredictor:
-    """
+def load_predictor(det_arch: str, reco_arch: str, device: torch.device) -> OCRPredictor:
+    """Load a predictor from doctr.models
+
     Args:
-        device is torch.device
+    ----
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        device: torch.device, the device to load the predictor on
+
+    Returns:
+    -------
+        instance of OCRPredictor
     """
     predictor = ocr_predictor(
         det_arch, reco_arch, pretrained=True, assume_straight_pages=("rotation" not in det_arch)
     ).to(device)
     return predictor
 
 
-def forward_image(predictor: OCRPredictor, image: np.ndarray, device) -> np.ndarray:
-    """
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: torch.device) -> np.ndarray:
+    """Forward an image through the predictor
+
     Args:
-        device is torch.device
+    ----
+        predictor: instance of OCRPredictor
+        image: image to process
+        device: torch.device, the device to process the image on
+
+    Returns:
+    -------
+        segmentation map
     """
     with torch.no_grad():
         processed_batches = predictor.det_predictor.pre_processor([image])

diff --git a/demo/backend/tensorflow.py b/demo/backend/tensorflow.py
@@ -29,10 +29,18 @@
 ]
 
 
-def load_predictor(det_arch: str, reco_arch: str, device) -> OCRPredictor:
-    """
+def load_predictor(det_arch: str, reco_arch: str, device: tf.device) -> OCRPredictor:
+    """Load a predictor from doctr.models
+
     Args:
-        device is tf.device
+    ----
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        device: tf.device, the device to load the predictor on
+
+    Returns:
+    -------
+        instance of OCRPredictor
     """
     with device:
         predictor = ocr_predictor(
@@ -41,10 +49,18 @@ def load_predictor(det_arch: str, reco_arch: str, device) -> OCRPredictor:
     return predictor
 
 
-def forward_image(predictor: OCRPredictor, image: np.ndarray, device) -> np.ndarray:
-    """
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: tf.device) -> np.ndarray:
+    """Forward an image through the predictor
+
     Args:
-        device is tf.device
+    ----
+        predictor: instance of OCRPredictor
+        image: image to process as numpy array
+        device: tf.device, the device to process the image on
+
+    Returns:
+    -------
+        segmentation map
     """
     with device:
         processed_batches = predictor.det_predictor.pre_processor([image])

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -134,9 +134,7 @@ def add_ga_javascript(app, pagename, templatename, context, doctree):
   gtag('js', new Date());
   gtag('config', '{0}');
 </script>
-    """.format(
-        app.config.googleanalytics_id
-    )
+    """.format(app.config.googleanalytics_id)
     context["metatags"] = metatags
 
 

diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst
@@ -323,7 +323,7 @@ You can also export them as a nested dict, more appropriate for JSON format::
 
   json_output = result.export()
 
-For reference, here is the JSON export for the same `Document` as above::
+For reference, here is the export for the same `Document` as above::
 
   {
     'pages': [

diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py
@@ -29,6 +29,7 @@ class CORD(VisionDataset):
     >>> img, target = train_set[0]
 
     Args:
+    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task

diff --git a/doctr/datasets/datasets/base.py b/doctr/datasets/datasets/base.py
@@ -59,7 +59,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             # Conditions to assess it is detection model with multiple classes and avoid confusion with other tasks.
             if (
                 isinstance(target, dict)
-                and all([isinstance(item, np.ndarray) for item in target.values()])
+                and all(isinstance(item, np.ndarray) for item in target.values())
                 and set(target.keys()) != {"boxes", "labels"}  # avoid confusion with obj detection target
             ):
                 img_transformed = _copy_tensor(img)
@@ -82,6 +82,7 @@ class _VisionDataset(_AbstractDataset):
     """Implements an abstract dataset
 
     Args:
+    ----
         url: URL of the dataset
         file_name: name of the file once downloaded
         file_hash: expected SHA256 of the file

diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py
@@ -18,6 +18,8 @@
 
 
 class AbstractDataset(_AbstractDataset):
+    """Abstract class for all datasets"""
+
     def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
         img_name, target = self.data[index]
 
@@ -53,5 +55,5 @@ def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, L
         return images, list(targets)
 
 
-class VisionDataset(AbstractDataset, _VisionDataset):
+class VisionDataset(AbstractDataset, _VisionDataset):  # noqa: D101
     pass
diff --git a/doctr/datasets/datasets/tensorflow.py b/doctr/datasets/datasets/tensorflow.py
@@ -18,6 +18,8 @@
 
 
 class AbstractDataset(_AbstractDataset):
+    """Abstract class for all datasets"""
+
     def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
         img_name, target = self.data[index]
 
@@ -53,5 +55,5 @@ def collate_fn(samples: List[Tuple[tf.Tensor, Any]]) -> Tuple[tf.Tensor, List[An
         return images, list(targets)
 
 
-class VisionDataset(AbstractDataset, _VisionDataset):
+class VisionDataset(AbstractDataset, _VisionDataset):  # noqa: D101
     pass
diff --git a/doctr/datasets/detection.py b/doctr/datasets/detection.py
@@ -26,6 +26,7 @@ class DetectionDataset(AbstractDataset):
     >>> img, target = train_set[0]
 
     Args:
+    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the annotations of each image
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -66,14 +67,16 @@ def __init__(
     def format_polygons(
         self, polygons: Union[List, Dict], use_polygons: bool, np_dtype: Type
     ) -> Tuple[np.ndarray, List[str]]:
-        """format polygons into an array
+        """Format polygons into an array
 
         Args:
+        ----
             polygons: the bounding boxes
             use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
             np_dtype: dtype of array
 
         Returns:
+        -------
             geoms: bounding boxes as np array
             polygons_classes: list of classes for each bounding box
         """
@@ -92,4 +95,4 @@ def format_polygons(
 
     @property
     def class_names(self):
-        return sorted(list(set(self._class_names)))
+        return sorted(set(self._class_names))
diff --git a/doctr/datasets/doc_artefacts.py b/doctr/datasets/doc_artefacts.py
@@ -26,6 +26,7 @@ class DocArtefacts(VisionDataset):
     >>> img, target = train_set[0]
 
     Args:
+    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         **kwargs: keyword arguments from `VisionDataset`.

diff --git a/doctr/datasets/funsd.py b/doctr/datasets/funsd.py
@@ -29,6 +29,7 @@ class FUNSD(VisionDataset):
     >>> img, target = train_set[0]
 
     Args:
+    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
@@ -81,7 +82,7 @@ def __init__(
             text_targets, box_targets = zip(*_targets)
             if use_polygons:
                 # xmin, ymin, xmax, ymax -> (x, y) coordinates of top left, top right, bottom right, bottom left corners
-                box_targets = [
+                box_targets = [  # type: ignore[assignment]
                     [
                         [box[0], box[1]],
                         [box[2], box[1]],

diff --git a/doctr/datasets/generator/base.py b/doctr/datasets/generator/base.py
@@ -24,16 +24,17 @@ def synthesize_text_img(
     """Generate a synthetic text image
 
     Args:
+    ----
         text: the text to render as an image
         font_size: the size of the font
         font_family: the font family (has to be installed on your system)
         background_color: background color of the final image
         text_color: text color on the final image
 
     Returns:
+    -------
         PIL image of the text
     """
-
     background_color = (0, 0, 0) if background_color is None else background_color
     text_color = (255, 255, 255) if text_color is None else text_color