diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 485e9c68d4..afbff36725 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ Everything you need to know to contribute efficiently to the project. - [docs](https://github.com/mindee/doctr/blob/main/docs) - Library documentation building - [scripts](https://github.com/mindee/doctr/blob/main/scripts) - Example scripts - [references](https://github.com/mindee/doctr/blob/main/references) - Reference training scripts -- [demo](https://github.com/mindee/doctr/blob/main/demo) - Small demo app to showcase docTR capabilities +- [demo](https://github.com/mindee/doctr/blob/main/demo) - Small demo app to showcase docTR capabilities - [api](https://github.com/mindee/doctr/blob/main/api) - A minimal template to deploy a REST API with docTR @@ -30,7 +30,7 @@ As a contributor, you will only have to ensure coverage of your code by adding a ### Feature requests & bug report -Whether you encountered a problem, or you have a feature suggestion, your input has value and can be used by contributors to reference it in their developments. For this purpose, we advise you to use Github [issues](https://github.com/mindee/doctr/issues). +Whether you encountered a problem, or you have a feature suggestion, your input has value and can be used by contributors to reference it in their developments. For this purpose, we advise you to use Github [issues](https://github.com/mindee/doctr/issues). First, check whether the topic wasn't already covered in an open / closed issue. If not, feel free to open a new one! When doing so, use issue templates whenever possible and provide enough information for other contributors to jump in. diff --git a/README.md b/README.md index 43ada64b71..feac62bff1 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show() ![Synthesis sample](https://github.com/mindee/doctr/releases/download/v0.3.1/synthesized_sample.png) -The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`). +The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`). To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/io.html#document-structure): You can also export them as a nested dict, more appropriate for JSON format: @@ -104,7 +104,7 @@ For examples & further details about the export format, please refer to [this se ### Prerequisites -Python 3.6 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR. +Python 3.6 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR. Since we use [weasyprint](https://weasyprint.readthedocs.io/), you will need extra dependencies if you are not running Linux. diff --git a/docs/requirements.txt b/docs/requirements.txt index 310c98c486..fe70aaa412 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,8 @@ -sphinx<3.5.0 -sphinx-rtd-theme==0.4.3 +sphinx>=4.0.0 sphinxemoji>=0.1.8 sphinx-copybutton>=0.3.1 docutils<0.18 recommonmark>=0.7.1 sphinx-markdown-tables>=0.0.15 +sphinx-tabs>=3.3.0 +furo>=2022.3.4 diff --git a/docs/source/_static/css/mindee.css b/docs/source/_static/css/mindee.css index a17612f34c..3518282bd8 100644 --- a/docs/source/_static/css/mindee.css +++ b/docs/source/_static/css/mindee.css @@ -34,10 +34,14 @@ display: block; } -.wy-nav-content { - max-width: 1000px !important; -} h1, h2, h3, h4, h5, .caption-text { font-family: "Helvetica Neue", Arial, sans-serif; } + +/* Github button */ + +.github-repo { + display: flex; + justify-content: center; +} diff --git a/docs/source/_static/js/custom.js b/docs/source/_static/js/custom.js index 2a0a4ec4b0..2092e480ea 100644 --- a/docs/source/_static/js/custom.js +++ b/docs/source/_static/js/custom.js @@ -28,7 +28,7 @@ function addGithubButton() { `; - document.querySelector(".wy-side-nav-search .icon-home").insertAdjacentHTML('afterend', div); + document.querySelector(".sidebar-brand").insertAdjacentHTML('afterend', div); } function addVersionControl() { @@ -85,7 +85,7 @@ function addVersionControl() { div.style.display = 'block'; div.style.textAlign = 'center'; - const scrollDiv = document.querySelector(".wy-side-nav-search"); + const scrollDiv = document.querySelector(".sidebar-brand"); scrollDiv.insertBefore(div, scrollDiv.children[1]); } diff --git a/docs/source/conf.py b/docs/source/conf.py index 1ce4609a6b..59f6447292 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,8 +14,6 @@ import sys from datetime import datetime -import sphinx_rtd_theme - sys.path.insert(0, os.path.abspath('../..')) import doctr @@ -48,6 +46,7 @@ 'sphinx_copybutton', 'recommonmark', 'sphinx_markdown_tables', + 'sphinx_tabs.tabs', ] intersphinx_mapping = { @@ -67,7 +66,8 @@ # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = 'friendly' +pygments_dark_style = "monokai" highlight_language = 'python3' # -- Options for HTML output ------------------------------------------------- @@ -75,18 +75,31 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme = 'furo' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # + html_theme_options = { - 'collapse_navigation': False, - 'display_version': False, - 'logo_only': False, - 'analytics_id': 'G-40DVRMX8T4', + "sidebar_hide_name": True, + "navigation_with_keys": True, + "light_css_variables": { + "color-sidebar-background": "#082747", + "color-sidebar-background-border": "#082747", + "color-sidebar-caption-text": "white", + "color-sidebar-link-text--top-level": "white", + "color-sidebar-link-text": "white", + "sidebar-caption-font-size": "normal", + "color-sidebar-item-background--hover": " #5dade2", + }, + "dark_css_variables": { + "color-sidebar-background": "#1a1c1e", + "color-sidebar-background-border": "#1a1c1e", + "color-sidebar-caption-text": "white", + "color-sidebar-link-text--top-level": "white", + }, } html_logo = '_static/images/Logo-docTR-white.png' diff --git a/docs/source/changelog.rst b/docs/source/development/changelog.rst similarity index 100% rename from docs/source/changelog.rst rename to docs/source/development/changelog.rst diff --git a/docs/source/development/code_of_conduct.md b/docs/source/development/code_of_conduct.md new file mode 120000 index 0000000000..5d525cd1fd --- /dev/null +++ b/docs/source/development/code_of_conduct.md @@ -0,0 +1 @@ +../../../CODE_OF_CONDUCT.md \ No newline at end of file diff --git a/docs/source/development/contributing.md b/docs/source/development/contributing.md new file mode 120000 index 0000000000..c97564d93a --- /dev/null +++ b/docs/source/development/contributing.md @@ -0,0 +1 @@ +../../../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 2be367403c..75f8973137 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -30,7 +30,7 @@ Main Features :caption: Getting started :hidden: - installing + install/installing notebooks @@ -60,6 +60,7 @@ Supported datasets * SVHN from `"Reading Digits in Natural Images with Unsupervised Feature Learning" `_. * IC03 from `ICDAR 2003 `_. * IC13 from `ICDAR 2013 `_. + * IMGUR5K from `"TextStyleBrush: Transfer of Text Aesthetics from a Single Example" `_. .. toctree:: @@ -67,8 +68,8 @@ Supported datasets :caption: Using docTR :hidden: - using_models - using_model_export + models/using_models + models/using_model_export .. toctree:: @@ -76,11 +77,20 @@ Supported datasets :caption: Package Reference :hidden: - datasets - io - models - transforms - utils + package_reference/datasets + package_reference/io + package_reference/models + package_reference/transforms + package_reference/utils + + +.. toctree:: + :maxdepth: 2 + :caption: Development + :hidden: + + development/contributing + development/code_of_conduct .. toctree:: @@ -88,4 +98,4 @@ Supported datasets :caption: Notes :hidden: - changelog + development/changelog diff --git a/docs/source/installing.rst b/docs/source/install/installing.rst similarity index 76% rename from docs/source/installing.rst rename to docs/source/install/installing.rst index 8197df660d..002c168c48 100644 --- a/docs/source/installing.rst +++ b/docs/source/install/installing.rst @@ -5,7 +5,6 @@ Installation This library requires `Python `_ 3.6 or higher. - Prerequisites ============= @@ -24,7 +23,6 @@ For MacOS users, you can install them using `Homebrew `_ as fo For Windows users, those dependencies are included in GTK. You can find the latest installer over `here `_. - Via Python Package ================== @@ -37,13 +35,19 @@ Install the last stable release of the package using `pip `_ -* `db_resnet50 `_ -* `db_mobilenet_v3_large `_ - -We also provide 2 models working with any kind of rotated documents: -* `linknet_resnet18_rotation `_ -* `db_resnet50_rotation `_ - -For a comprehensive comparison, we have compiled a detailed benchmark on publicly available datasets: - - -+------------------------------------------------------------------+----------------------------+----------------------------+---------+ -| | FUNSD | CORD | | -+=================================+=================+==============+============+===============+============+===============+=========+ -| **Architecture** | **Input shape** | **# params** | **Recall** | **Precision** | **Recall** | **Precision** | **FPS** | -+---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ -| db_resnet50 | (1024, 1024, 3) | 25.2 M | 82.14 | 87.64 | 92.49 | 89.66 | 2.1 | -+---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ -| db_mobilenet_v3_large | (1024, 1024, 3) | 4.2 M | 79.35 | 84.03 | 81.14 | 66.85 | | -+---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ - - -All text detection models above have been evaluated using both the training and evaluation sets of FUNSD and CORD (cf. :ref:`datasets`). -Explanations about the metrics being used are available in :ref:`metrics`. - -*Disclaimer: both FUNSD subsets combined have 199 pages which might not be representative enough of the model capabilities* - -FPS (Frames per second) is computed after a warmup phase of 100 tensors (where the batch size is 1), by measuring the average number of processed tensors per second over 1000 samples. Those results were obtained on a `c5.x12large `_ AWS instance (CPU Xeon Platinum 8275L). - - -Detection predictors -^^^^^^^^^^^^^^^^^^^^ - -`detection_predictor `_ wraps your detection model to make it easily useable with your favorite deep learning framework seamlessly. - - >>> import numpy as np - >>> from doctr.models import detection_predictor - >>> predictor = detection_predictor('db_resnet50') - >>> dummy_img = (255 * np.random.rand(800, 600, 3)).astype(np.uint8) - >>> out = model([dummy_img]) - -You can pass specific boolean arguments to the predictor: - -* `assume_straight_pages`: if you work with straight documents only, it will fit straight bounding boxes to the text areas. -* `preserve_aspect_ratio`: if you want to preserve the aspect ratio of your documents while resizing before sending them to the model. -* `symmetric_pad`: if you choose to preserve the aspect ratio, it will pad the image symmetrically and not from the bottom-right. - -For instance, this snippet will instantiates a detection predictor able to detect text on rotated documents while preserving the aspect ratio: - - >>> from doctr.models import detection_predictor - >>> predictor = detection_predictor('db_resnet50_rotation', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) - -NB: for the moment, `db_resnet50_rotation` is pretrained in Pytorch only and `linknet_resnet18_rotation` in Tensorflow only. - - -Text Recognition ----------------- - -The task consists of transcribing the character sequence in a given image. - - -Available architectures -^^^^^^^^^^^^^^^^^^^^^^^ - -The following architectures are currently supported: - -* `crnn_vgg16_bn `_ -* `crnn_mobilenet_v3_small `_ -* `crnn_mobilenet_v3_large `_ -* `sar_resnet31 `_ -* `master `_ - - -For a comprehensive comparison, we have compiled a detailed benchmark on publicly available datasets: - - -.. list-table:: Text recognition model zoo - :header-rows: 1 - - * - Architecture - - Input shape - - # params - - FUNSD - - CORD - - FPS - * - crnn_vgg16_bn - - (32, 128, 3) - - 15.8M - - 87.18 - - 92.93 - - 12.8 - * - crnn_mobilenet_v3_small - - (32, 128, 3) - - 2.1M - - 86.21 - - 90.56 - - - * - crnn_mobilenet_v3_large - - (32, 128, 3) - - 4.5M - - 86.95 - - 92.03 - - - * - sar_resnet31 - - (32, 128, 3) - - 56.2M - - **87.70** - - **93.41** - - 2.7 - * - master - - (32, 128, 3) - - 67.7M - - 87.62 - - 93.27 - - - -All text recognition models above have been evaluated using both the training and evaluation sets of FUNSD and CORD (cf. :ref:`datasets`). -Explanations about the metric being used (exact match) are available in :ref:`metrics`. - -While most of our recognition models were trained on our french vocab (cf. :ref:`vocabs`), you can easily access the vocab of any model as follows: - - >>> from doctr.models import recognition_predictor - >>> predictor = recognition_predictor('crnn_vgg16_bn') - >>> print(predictor.model.cfg['vocab']) - - -*Disclaimer: both FUNSD subsets combine have 30595 word-level crops which might not be representative enough of the model capabilities* - -FPS (Frames per second) is computed after a warmup phase of 100 tensors (where the batch size is 1), by measuring the average number of processed tensors per second over 1000 samples. Those results were obtained on a `c5.x12large `_ AWS instance (CPU Xeon Platinum 8275L). - - -Recognition predictors -^^^^^^^^^^^^^^^^^^^^^^ -`recognition_predictor `_ wraps your recognition model to make it easily useable with your favorite deep learning framework seamlessly. - - >>> import numpy as np - >>> from doctr.models import recognition_predictor - >>> predictor = recognition_predictor('crnn_vgg16_bn') - >>> dummy_img = (255 * np.random.rand(50, 150, 3)).astype(np.uint8) - >>> out = model([dummy_img]) - - End-to-End OCR -------------- @@ -360,4 +192,4 @@ For reference, here is a sample XML byte string output::

- \ No newline at end of file + diff --git a/docs/source/models/using_models.rst b/docs/source/models/using_models.rst new file mode 100644 index 0000000000..68fa54c248 --- /dev/null +++ b/docs/source/models/using_models.rst @@ -0,0 +1,20 @@ +Choosing the right model +======================== + +The full Optical Character Recognition task can be seen as two consecutive tasks: text detection and text recognition. +Either performed at once or separately, to each task corresponds a type of deep learning architecture. + +.. currentmodule:: doctr.models + +For a given task, docTR provides a Predictor, which is composed of 2 components: + +* PreProcessor: a module in charge of making inputs directly usable by the deep learning model. +* Model: a deep learning model, implemented with all supported deep learning backends (TensorFlow & PyTorch) along with its specific post-processor to make outputs structured and reusable. + +.. toctree:: + :maxdepth: 3 + + using_models.text_detection + using_models.text_recognition + using_models.end_to_end + diff --git a/docs/source/models/using_models.text_detection.rst b/docs/source/models/using_models.text_detection.rst new file mode 100644 index 0000000000..f96e74616a --- /dev/null +++ b/docs/source/models/using_models.text_detection.rst @@ -0,0 +1,65 @@ +Text Detection +-------------- + +The task consists of localizing textual elements in a given image. +While those text elements can represent many things, in docTR, we will consider uninterrupted character sequences (words). Additionally, the localization can take several forms: from straight bounding boxes (delimited by the 2D coordinates of the top-left and bottom-right corner), to polygons, or binary segmentation (flagging which pixels belong to this element, and which don't). +Our latest detection models works with rotated and skewed documents! + +Available architectures +^^^^^^^^^^^^^^^^^^^^^^^ + +The following architectures are currently supported: + +* `linknet_resnet18 `_ +* `db_resnet50 `_ +* `db_mobilenet_v3_large `_ + +We also provide 2 models working with any kind of rotated documents: + +* `linknet_resnet18_rotation `_ +* `db_resnet50_rotation `_ + +For a comprehensive comparison, we have compiled a detailed benchmark on publicly available datasets: + + ++------------------------------------------------------------------+----------------------------+----------------------------+---------+ +| | FUNSD | CORD | | ++=================================+=================+==============+============+===============+============+===============+=========+ +| **Architecture** | **Input shape** | **# params** | **Recall** | **Precision** | **Recall** | **Precision** | **FPS** | ++---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ +| db_resnet50 | (1024, 1024, 3) | 25.2 M | 82.14 | 87.64 | 92.49 | 89.66 | 2.1 | ++---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ +| db_mobilenet_v3_large | (1024, 1024, 3) | 4.2 M | 79.35 | 84.03 | 81.14 | 66.85 | | ++---------------------------------+-----------------+--------------+------------+---------------+------------+---------------+---------+ + + +All text detection models above have been evaluated using both the training and evaluation sets of FUNSD and CORD (cf. :ref:`datasets`). +Explanations about the metrics being used are available in :ref:`metrics`. + +*Disclaimer: both FUNSD subsets combined have 199 pages which might not be representative enough of the model capabilities* + +FPS (Frames per second) is computed after a warmup phase of 100 tensors (where the batch size is 1), by measuring the average number of processed tensors per second over 1000 samples. Those results were obtained on a `c5.x12large `_ AWS instance (CPU Xeon Platinum 8275L). + +Detection predictors +^^^^^^^^^^^^^^^^^^^^ + +`detection_predictor `_ wraps your detection model to make it easily useable with your favorite deep learning framework seamlessly. + + >>> import numpy as np + >>> from doctr.models import detection_predictor + >>> predictor = detection_predictor('db_resnet50') + >>> dummy_img = (255 * np.random.rand(800, 600, 3)).astype(np.uint8) + >>> out = model([dummy_img]) + +You can pass specific boolean arguments to the predictor: + +* `assume_straight_pages`: if you work with straight documents only, it will fit straight bounding boxes to the text areas. +* `preserve_aspect_ratio`: if you want to preserve the aspect ratio of your documents while resizing before sending them to the model. +* `symmetric_pad`: if you choose to preserve the aspect ratio, it will pad the image symmetrically and not from the bottom-right. + +For instance, this snippet will instantiates a detection predictor able to detect text on rotated documents while preserving the aspect ratio: + + >>> from doctr.models import detection_predictor + >>> predictor = detection_predictor('db_resnet50_rotation', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) + +NB: for the moment, `db_resnet50_rotation` is pretrained in Pytorch only and `linknet_resnet18_rotation` in Tensorflow only. \ No newline at end of file diff --git a/docs/source/models/using_models.text_recognition.rst b/docs/source/models/using_models.text_recognition.rst new file mode 100644 index 0000000000..e9e2cbd32d --- /dev/null +++ b/docs/source/models/using_models.text_recognition.rst @@ -0,0 +1,85 @@ +Text Recognition +---------------- + +The task consists of transcribing the character sequence in a given image. + + +Available architectures +^^^^^^^^^^^^^^^^^^^^^^^ + +The following architectures are currently supported: + +* `crnn_vgg16_bn `_ +* `crnn_mobilenet_v3_small `_ +* `crnn_mobilenet_v3_large `_ +* `sar_resnet31 `_ +* `master `_ + + +For a comprehensive comparison, we have compiled a detailed benchmark on publicly available datasets: + + +.. list-table:: Text recognition model zoo + :header-rows: 1 + + * - Architecture + - Input shape + - # params + - FUNSD + - CORD + - FPS + * - crnn_vgg16_bn + - (32, 128, 3) + - 15.8M + - 87.18 + - 92.93 + - 12.8 + * - crnn_mobilenet_v3_small + - (32, 128, 3) + - 2.1M + - 86.21 + - 90.56 + - + * - crnn_mobilenet_v3_large + - (32, 128, 3) + - 4.5M + - 86.95 + - 92.03 + - + * - sar_resnet31 + - (32, 128, 3) + - 56.2M + - **87.70** + - **93.41** + - 2.7 + * - master + - (32, 128, 3) + - 67.7M + - 87.62 + - 93.27 + - + +All text recognition models above have been evaluated using both the training and evaluation sets of FUNSD and CORD (cf. :ref:`datasets`). +Explanations about the metric being used (exact match) are available in :ref:`metrics`. + +While most of our recognition models were trained on our french vocab (cf. :ref:`vocabs`), you can easily access the vocab of any model as follows: + + >>> from doctr.models import recognition_predictor + >>> predictor = recognition_predictor('crnn_vgg16_bn') + >>> print(predictor.model.cfg['vocab']) + + +*Disclaimer: both FUNSD subsets combine have 30595 word-level crops which might not be representative enough of the model capabilities* + +FPS (Frames per second) is computed after a warmup phase of 100 tensors (where the batch size is 1), by measuring the average number of processed tensors per second over 1000 samples. Those results were obtained on a `c5.x12large `_ AWS instance (CPU Xeon Platinum 8275L). + + +Recognition predictors +^^^^^^^^^^^^^^^^^^^^^^ +`recognition_predictor `_ wraps your recognition model to make it easily useable with your favorite deep learning framework seamlessly. + + >>> import numpy as np + >>> from doctr.models import recognition_predictor + >>> predictor = recognition_predictor('crnn_vgg16_bn') + >>> dummy_img = (255 * np.random.rand(50, 150, 3)).astype(np.uint8) + >>> out = model([dummy_img]) \ No newline at end of file diff --git a/docs/source/package_reference/dataset_images/artefacts-grid.png b/docs/source/package_reference/dataset_images/artefacts-grid.png new file mode 100644 index 0000000000..76dfc5685c Binary files /dev/null and b/docs/source/package_reference/dataset_images/artefacts-grid.png differ diff --git a/docs/source/package_reference/dataset_images/cord-grid.png b/docs/source/package_reference/dataset_images/cord-grid.png new file mode 100644 index 0000000000..de143efc26 Binary files /dev/null and b/docs/source/package_reference/dataset_images/cord-grid.png differ diff --git a/docs/source/package_reference/dataset_images/funsd-grid.png b/docs/source/package_reference/dataset_images/funsd-grid.png new file mode 100644 index 0000000000..edbd23c3d8 Binary files /dev/null and b/docs/source/package_reference/dataset_images/funsd-grid.png differ diff --git a/docs/source/package_reference/dataset_images/ic03-grid.png b/docs/source/package_reference/dataset_images/ic03-grid.png new file mode 100644 index 0000000000..9c0a3bf6e1 Binary files /dev/null and b/docs/source/package_reference/dataset_images/ic03-grid.png differ diff --git a/docs/source/package_reference/dataset_images/ic13-grid.png b/docs/source/package_reference/dataset_images/ic13-grid.png new file mode 100644 index 0000000000..22170eab0c Binary files /dev/null and b/docs/source/package_reference/dataset_images/ic13-grid.png differ diff --git a/docs/source/package_reference/dataset_images/iiit5k-grid.png b/docs/source/package_reference/dataset_images/iiit5k-grid.png new file mode 100644 index 0000000000..0cf7a3bcbc Binary files /dev/null and b/docs/source/package_reference/dataset_images/iiit5k-grid.png differ diff --git a/docs/source/package_reference/dataset_images/imgur5k-grid.png b/docs/source/package_reference/dataset_images/imgur5k-grid.png new file mode 100644 index 0000000000..f7057d9c2e Binary files /dev/null and b/docs/source/package_reference/dataset_images/imgur5k-grid.png differ diff --git a/docs/source/package_reference/dataset_images/sroie-grid.png b/docs/source/package_reference/dataset_images/sroie-grid.png new file mode 100644 index 0000000000..9f1dce97d4 Binary files /dev/null and b/docs/source/package_reference/dataset_images/sroie-grid.png differ diff --git a/docs/source/package_reference/dataset_images/svhn-grid.png b/docs/source/package_reference/dataset_images/svhn-grid.png new file mode 100644 index 0000000000..b27a3864d9 Binary files /dev/null and b/docs/source/package_reference/dataset_images/svhn-grid.png differ diff --git a/docs/source/package_reference/dataset_images/svt-grid.png b/docs/source/package_reference/dataset_images/svt-grid.png new file mode 100644 index 0000000000..978c396aa6 Binary files /dev/null and b/docs/source/package_reference/dataset_images/svt-grid.png differ diff --git a/docs/source/package_reference/dataset_images/synthtext-grid.png b/docs/source/package_reference/dataset_images/synthtext-grid.png new file mode 100644 index 0000000000..b358cd17f2 Binary files /dev/null and b/docs/source/package_reference/dataset_images/synthtext-grid.png differ diff --git a/docs/source/datasets.rst b/docs/source/package_reference/datasets.rst similarity index 100% rename from docs/source/datasets.rst rename to docs/source/package_reference/datasets.rst diff --git a/docs/source/io.rst b/docs/source/package_reference/io.rst similarity index 100% rename from docs/source/io.rst rename to docs/source/package_reference/io.rst diff --git a/docs/source/models.rst b/docs/source/package_reference/models.rst similarity index 98% rename from docs/source/models.rst rename to docs/source/package_reference/models.rst index 04b1b78acd..485b36ebd4 100644 --- a/docs/source/models.rst +++ b/docs/source/package_reference/models.rst @@ -5,7 +5,7 @@ doctr.models doctr.models.classification ----------------------- +--------------------------- .. autofunction:: doctr.models.classification.vgg16_bn_r diff --git a/docs/source/transforms.rst b/docs/source/package_reference/transforms.rst similarity index 100% rename from docs/source/transforms.rst rename to docs/source/package_reference/transforms.rst diff --git a/docs/source/utils.rst b/docs/source/package_reference/utils.rst similarity index 100% rename from docs/source/utils.rst rename to docs/source/package_reference/utils.rst diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py index 6740913d63..5c6d9410fe 100644 --- a/doctr/datasets/cord.py +++ b/doctr/datasets/cord.py @@ -20,10 +20,12 @@ class CORD(VisionDataset): """CORD dataset from `"CORD: A Consolidated Receipt Dataset forPost-OCR Parsing" `_. - Example:: - >>> from doctr.datasets import CORD - >>> train_set = CORD(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/cord-grid.png + :align: center + + >>> from doctr.datasets import CORD + >>> train_set = CORD(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/detection.py b/doctr/datasets/detection.py index 80f64c63e6..fb36b9d7b7 100644 --- a/doctr/datasets/detection.py +++ b/doctr/datasets/detection.py @@ -20,10 +20,10 @@ class DetectionDataset(AbstractDataset): """Implements a text detection dataset - Example:: - >>> from doctr.datasets import DetectionDataset - >>> train_set = DetectionDataset(img_folder="/path/to/images", label_path="/path/to/labels.json") - >>> img, target = train_set[0] + >>> from doctr.datasets import DetectionDataset + >>> train_set = DetectionDataset(img_folder="/path/to/images", + >>> label_path="/path/to/labels.json") + >>> img, target = train_set[0] Args: img_folder: folder with all the images of the dataset diff --git a/doctr/datasets/doc_artefacts.py b/doctr/datasets/doc_artefacts.py index 044af0b93a..bb36f252d3 100644 --- a/doctr/datasets/doc_artefacts.py +++ b/doctr/datasets/doc_artefacts.py @@ -18,10 +18,13 @@ class DocArtefacts(VisionDataset): """Object detection dataset for non-textual elements in documents. The dataset includes a variety of synthetic document pages with non-textual elements. - Example:: - >>> from doctr.datasets import DocArtefacts - >>> train_set = DocArtefacts(download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/artefacts-grid.png + :align: center + + + >>> from doctr.datasets import DocArtefacts + >>> train_set = DocArtefacts(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/funsd.py b/doctr/datasets/funsd.py index b0de69a7d0..2bac1e8d8e 100644 --- a/doctr/datasets/funsd.py +++ b/doctr/datasets/funsd.py @@ -20,10 +20,12 @@ class FUNSD(VisionDataset): """FUNSD dataset from `"FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents" `_. - Example:: - >>> from doctr.datasets import FUNSD - >>> train_set = FUNSD(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/funsd-grid.png + :align: center + + >>> from doctr.datasets import FUNSD + >>> train_set = FUNSD(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/generator/pytorch.py b/doctr/datasets/generator/pytorch.py index f0e4141d5d..b1b7eafd6d 100644 --- a/doctr/datasets/generator/pytorch.py +++ b/doctr/datasets/generator/pytorch.py @@ -13,10 +13,9 @@ class CharacterGenerator(_CharacterGenerator): """Implements a character image generation dataset - Example:: - >>> from doctr.datasets import CharacterGenerator - >>> ds = CharacterGenerator(vocab='abdef') - >>> img, target = ds[0] + >>> from doctr.datasets import CharacterGenerator + >>> ds = CharacterGenerator(vocab='abdef') + >>> img, target = ds[0] Args: vocab: vocabulary to take the character from @@ -35,10 +34,9 @@ def __init__(self, *args, **kwargs) -> None: class WordGenerator(_WordGenerator): """Implements a character image generation dataset - Example:: - >>> from doctr.datasets import WordGenerator - >>> ds = WordGenerator(vocab='abdef') - >>> img, target = ds[0] + >>> from doctr.datasets import WordGenerator + >>> ds = WordGenerator(vocab='abdef') + >>> img, target = ds[0] Args: vocab: vocabulary to take the character from diff --git a/doctr/datasets/generator/tensorflow.py b/doctr/datasets/generator/tensorflow.py index bb6d09c081..c6bac8f848 100644 --- a/doctr/datasets/generator/tensorflow.py +++ b/doctr/datasets/generator/tensorflow.py @@ -13,10 +13,9 @@ class CharacterGenerator(_CharacterGenerator): """Implements a character image generation dataset - Example:: - >>> from doctr.datasets import CharacterGenerator - >>> ds = CharacterGenerator(vocab='abdef') - >>> img, target = ds[0] + >>> from doctr.datasets import CharacterGenerator + >>> ds = CharacterGenerator(vocab='abdef') + >>> img, target = ds[0] Args: vocab: vocabulary to take the character from diff --git a/doctr/datasets/ic03.py b/doctr/datasets/ic03.py index 2073604e21..acf5cf7de4 100644 --- a/doctr/datasets/ic03.py +++ b/doctr/datasets/ic03.py @@ -18,10 +18,12 @@ class IC03(VisionDataset): """IC03 dataset from `"ICDAR 2003 Robust Reading Competitions: Entries, Results and Future Directions" `_. - Example:: - >>> from doctr.datasets import IC03 - >>> train_set = IC03(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/ic03-grid.png + :align: center + + >>> from doctr.datasets import IC03 + >>> train_set = IC03(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/ic13.py b/doctr/datasets/ic13.py index 404dfaee17..0da4081ef8 100644 --- a/doctr/datasets/ic13.py +++ b/doctr/datasets/ic13.py @@ -19,15 +19,17 @@ class IC13(AbstractDataset): """IC13 dataset from `"ICDAR 2013 Robust Reading Competition" `_. - Example:: - >>> # NOTE: You need to download both image and label parts from Focused Scene Text challenge Task2.1 2013-2015. - >>> from doctr.datasets import IC13 - >>> train_set = IC13(img_folder="/path/to/Challenge2_Training_Task12_Images", - >>> label_folder="/path/to/Challenge2_Training_Task1_GT") - >>> img, target = train_set[0] - >>> test_set = IC13(img_folder="/path/to/Challenge2_Test_Task12_Images", - >>> label_folder="/path/to/Challenge2_Test_Task1_GT") - >>> img, target = test_set[0] + .. image:: dataset_images/ic13-grid.png + :align: center + + >>> # NOTE: You need to download both image and label parts from Focused Scene Text challenge Task2.1 2013-2015. + >>> from doctr.datasets import IC13 + >>> train_set = IC13(img_folder="/path/to/Challenge2_Training_Task12_Images", + >>> label_folder="/path/to/Challenge2_Training_Task1_GT") + >>> img, target = train_set[0] + >>> test_set = IC13(img_folder="/path/to/Challenge2_Test_Task12_Images", + >>> label_folder="/path/to/Challenge2_Test_Task1_GT") + >>> img, target = test_set[0] Args: img_folder: folder with all the images of the dataset diff --git a/doctr/datasets/iiit5k.py b/doctr/datasets/iiit5k.py index 55cb1e91fc..c783fd16c7 100644 --- a/doctr/datasets/iiit5k.py +++ b/doctr/datasets/iiit5k.py @@ -21,11 +21,13 @@ class IIIT5K(VisionDataset): `"BMVC 2012 Scene Text Recognition using Higher Order Language Priors" `_. - Example:: - >>> # NOTE: this dataset is for character-level localization - >>> from doctr.datasets import IIIT5K - >>> train_set = IIIT5K(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/iiit5k-grid.png + :align: center + + >>> # NOTE: this dataset is for character-level localization + >>> from doctr.datasets import IIIT5K + >>> train_set = IIIT5K(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/imgur5k.py b/doctr/datasets/imgur5k.py index c75d83d408..e89405216a 100644 --- a/doctr/datasets/imgur5k.py +++ b/doctr/datasets/imgur5k.py @@ -20,17 +20,21 @@ class IMGUR5K(AbstractDataset): """IMGUR5K dataset from `"TextStyleBrush: Transfer of Text Aesthetics from a Single Example" `_ | - `"repository" `_. - - Example:: - >>> # NOTE: You need to download/generate the dataset from the repository. - >>> from doctr.datasets import IMGUR5K - >>> train_set = IMGUR5K(train=True, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", - >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") - >>> img, target = train_set[0] - >>> test_set = IMGUR5K(train=False, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", - >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") - >>> img, target = test_set[0] + `repository `_. + + .. image:: dataset_images/imgur5k-grid.png + :align: center + :width: 630 + :height: 400 + + >>> # NOTE: You need to download/generate the dataset from the repository. + >>> from doctr.datasets import IMGUR5K + >>> train_set = IMGUR5K(train=True, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", + >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") + >>> img, target = train_set[0] + >>> test_set = IMGUR5K(train=False, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", + >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") + >>> img, target = test_set[0] Args: img_folder: folder with all the images of the dataset diff --git a/doctr/datasets/loader.py b/doctr/datasets/loader.py index a435bc345f..d3abe340eb 100644 --- a/doctr/datasets/loader.py +++ b/doctr/datasets/loader.py @@ -34,12 +34,11 @@ def default_collate(samples): class DataLoader: """Implements a dataset wrapper for fast data loading - Example:: - >>> from doctr.datasets import FUNSD, DataLoader - >>> train_set = CORD(train=True, download=True) - >>> train_loader = DataLoader(train_set, batch_size=32) - >>> train_iter = iter(train_loader) - >>> images, targets = next(train_iter) + >>> from doctr.datasets import FUNSD, DataLoader + >>> train_set = CORD(train=True, download=True) + >>> train_loader = DataLoader(train_set, batch_size=32) + >>> train_iter = iter(train_loader) + >>> images, targets = next(train_iter) Args: dataset: the dataset diff --git a/doctr/datasets/recognition.py b/doctr/datasets/recognition.py index 5c9be584d5..e0ba87254d 100644 --- a/doctr/datasets/recognition.py +++ b/doctr/datasets/recognition.py @@ -16,10 +16,10 @@ class RecognitionDataset(AbstractDataset): """Dataset implementation for text recognition tasks - Example:: - >>> from doctr.datasets import RecognitionDataset - >>> train_set = RecognitionDataset(img_folder="/path/to/images", labels_path="/path/to/labels.json") - >>> img, target = train_set[0] + >>> from doctr.datasets import RecognitionDataset + >>> train_set = RecognitionDataset(img_folder="/path/to/images", + >>> labels_path="/path/to/labels.json") + >>> img, target = train_set[0] Args: img_folder: path to the images folder diff --git a/doctr/datasets/sroie.py b/doctr/datasets/sroie.py index 95e94e69b5..268eee3e30 100644 --- a/doctr/datasets/sroie.py +++ b/doctr/datasets/sroie.py @@ -20,10 +20,12 @@ class SROIE(VisionDataset): """SROIE dataset from `"ICDAR2019 Competition on Scanned Receipt OCR and Information Extraction" `_. - Example:: - >>> from doctr.datasets import SROIE - >>> train_set = SROIE(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/sroie-grid.png + :align: center + + >>> from doctr.datasets import SROIE + >>> train_set = SROIE(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/svhn.py b/doctr/datasets/svhn.py index 3a0096f652..472e24fc40 100644 --- a/doctr/datasets/svhn.py +++ b/doctr/datasets/svhn.py @@ -20,10 +20,12 @@ class SVHN(VisionDataset): """SVHN dataset from `"The Street View House Numbers (SVHN) Dataset" `_. - Example:: - >>> from doctr.datasets import SVHN - >>> train_set = SVHN(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/svhn-grid.png + :align: center + + >>> from doctr.datasets import SVHN + >>> train_set = SVHN(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/svt.py b/doctr/datasets/svt.py index 65d5455723..266949475d 100644 --- a/doctr/datasets/svt.py +++ b/doctr/datasets/svt.py @@ -18,10 +18,12 @@ class SVT(VisionDataset): """SVT dataset from `"The Street View Text Dataset - UCSD Computer Vision" `_. - Example:: - >>> from doctr.datasets import SVT - >>> train_set = SVT(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/svt-grid.png + :align: center + + >>> from doctr.datasets import SVT + >>> train_set = SVT(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/datasets/synthtext.py b/doctr/datasets/synthtext.py index f83f75a743..7c9699ce5b 100644 --- a/doctr/datasets/synthtext.py +++ b/doctr/datasets/synthtext.py @@ -21,10 +21,12 @@ class SynthText(VisionDataset): `_ | `"repository" `_ | `"website" `_. - Example:: - >>> from doctr.datasets import SynthText - >>> train_set = SynthText(train=True, download=True) - >>> img, target = train_set[0] + .. image:: dataset_images/svt-grid.png + :align: center + + >>> from doctr.datasets import SynthText + >>> train_set = SynthText(train=True, download=True) + >>> img, target = train_set[0] Args: train: whether the subset should be the training one diff --git a/doctr/io/html.py b/doctr/io/html.py index 0ae81888e9..8e464bbd4b 100644 --- a/doctr/io/html.py +++ b/doctr/io/html.py @@ -13,9 +13,8 @@ def read_html(url: str, **kwargs: Any) -> bytes: """Read a PDF file and convert it into an image in numpy format - Example:: - >>> from doctr.documents import read_html - >>> doc = read_html("https://www.yoursite.com") + >>> from doctr.documents import read_html + >>> doc = read_html("https://www.yoursite.com") Args: url: URL of the target web page diff --git a/doctr/io/image/base.py b/doctr/io/image/base.py index 14a8856f73..7d4db51970 100644 --- a/doctr/io/image/base.py +++ b/doctr/io/image/base.py @@ -21,9 +21,8 @@ def read_img_as_numpy( ) -> np.ndarray: """Read an image file into numpy format - Example:: - >>> from doctr.documents import read_img - >>> page = read_img("path/to/your/doc.jpg") + >>> from doctr.documents import read_img + >>> page = read_img("path/to/your/doc.jpg") Args: file: the path to the image file diff --git a/doctr/io/pdf.py b/doctr/io/pdf.py index fcbbfcd3ee..7f3066a19c 100644 --- a/doctr/io/pdf.py +++ b/doctr/io/pdf.py @@ -18,9 +18,8 @@ def read_pdf(file: AbstractFile, scale: float = 2, **kwargs: Any) -> List[np.ndarray]: """Read a PDF file and convert it into an image in numpy format - Example:: - >>> from doctr.documents import read_pdf - >>> doc = read_pdf("path/to/your/doc.pdf") + >>> from doctr.documents import read_pdf + >>> doc = read_pdf("path/to/your/doc.pdf") Args: file: the path to the PDF file diff --git a/doctr/io/reader.py b/doctr/io/reader.py index d76169a719..9c001685d3 100644 --- a/doctr/io/reader.py +++ b/doctr/io/reader.py @@ -24,9 +24,8 @@ class DocumentFile: def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]: """Read a PDF file - Example:: - >>> from doctr.documents import DocumentFile - >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf") + >>> from doctr.documents import DocumentFile + >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf") Args: file: the path to the PDF file or a binary stream @@ -40,9 +39,8 @@ def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]: def from_url(cls, url: str, **kwargs) -> List[np.ndarray]: """Interpret a web page as a PDF document - Example:: - >>> from doctr.documents import DocumentFile - >>> doc = DocumentFile.from_url("https://www.yoursite.com") + >>> from doctr.documents import DocumentFile + >>> doc = DocumentFile.from_url("https://www.yoursite.com") Args: url: the URL of the target web page @@ -56,9 +54,8 @@ def from_url(cls, url: str, **kwargs) -> List[np.ndarray]: def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]: """Read an image file (or a collection of image files) and convert it into an image in numpy format - Example:: - >>> from doctr.documents import DocumentFile - >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"]) + >>> from doctr.documents import DocumentFile + >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"]) Args: files: the path to the image file or a binary stream, or a collection of those diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 6620cf6f01..f507215478 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -96,13 +96,12 @@ def extract_rcrops( def get_max_width_length_ratio(contour: np.ndarray) -> float: - """ - Get the maximum shape ratio of a contour. + """Get the maximum shape ratio of a contour. + Args: contour: the contour from cv2.findContour Returns: the maximum shape ratio - """ _, (w, h), _ = cv2.minAreaRect(contour) return max(w / h, h / w) @@ -112,13 +111,13 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li """Estimate the angle of the general document orientation based on the lines of the document and the assumption that they should be horizontal. - Args: - img: the img to analyze - n_ct: the number of contours used for the orientation estimation - ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines - Returns: - the angle of the general document orientation - """ + Args: + img: the img to analyze + n_ct: the number of contours used for the orientation estimation + ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines + Returns: + the angle of the general document orientation + """ gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.medianBlur(gray_img, 5) thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] diff --git a/doctr/models/classification/magc_resnet/pytorch.py b/doctr/models/classification/magc_resnet/pytorch.py index 75e302949c..461a9b9126 100644 --- a/doctr/models/classification/magc_resnet/pytorch.py +++ b/doctr/models/classification/magc_resnet/pytorch.py @@ -135,12 +135,27 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: `"MASTER: Multi-Aspect Non-local Network for Scene Text Recognition", `_. - Example:: - >>> import torch - >>> from doctr.models import magc_resnet31 - >>> model = magc_resnet31(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import magc_resnet31 + >>> model = magc_resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import magc_resnet31 + >>> model = magc_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/magc_resnet/tensorflow.py b/doctr/models/classification/magc_resnet/tensorflow.py index 8062d348fd..73df986db1 100644 --- a/doctr/models/classification/magc_resnet/tensorflow.py +++ b/doctr/models/classification/magc_resnet/tensorflow.py @@ -167,12 +167,27 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: `"MASTER: Multi-Aspect Non-local Network for Scene Text Recognition", `_. - Example:: - >>> import torch - >>> from doctr.models import magc_resnet31 - >>> model = magc_resnet31(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import magc_resnet31 + >>> model = magc_resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import magc_resnet31 + >>> model = magc_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/mobilenet/pytorch.py b/doctr/models/classification/mobilenet/pytorch.py index f2a89672d5..1c86c29791 100644 --- a/doctr/models/classification/mobilenet/pytorch.py +++ b/doctr/models/classification/mobilenet/pytorch.py @@ -91,12 +91,27 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M `"Searching for MobileNetV3", `_. - Example:: - >>> import torch - >>> from doctr.models import mobilenet_v3_small - >>> model = mobilenetv3_small(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenetv3_small(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenetv3_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -113,12 +128,27 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3 `"Searching for MobileNetV3", `_, with rectangular pooling. - Example:: - >>> import torch - >>> from doctr.models import mobilenet_v3_small_r - >>> model = mobilenet_v3_small_r(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small_r + >>> model = mobilenet_v3_small(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenet_v3_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -140,12 +170,27 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M `"Searching for MobileNetV3", `_. - Example:: - >>> import torch - >>> from doctr.models import mobilenetv3_large - >>> model = mobilenetv3_large(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_large + >>> model = mobilenet_v3_large(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_large + >>> model = mobilenet_v3_large(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -161,12 +206,27 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3 `"Searching for MobileNetV3", `_, with rectangular pooling. - Example:: - >>> import torch - >>> from doctr.models import mobilenet_v3_large_r - >>> model = mobilenet_v3_large_r(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_large_r + >>> model = mobilenet_v3_large_r(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_large_r + >>> model = mobilenet_v3_large_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -187,12 +247,27 @@ def mobilenet_v3_small_orientation(pretrained: bool = False, **kwargs: Any) -> m `"Searching for MobileNetV3", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenet_v3_small_orientation - >>> model = mobilenet_v3_small_orientation(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small_orientation + >>> model = mobilenet_v3_small_orientation(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_orientation + >>> model = mobilenet_v3_small_orientation(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/mobilenet/tensorflow.py b/doctr/models/classification/mobilenet/tensorflow.py index d1328327a3..9aecde895a 100644 --- a/doctr/models/classification/mobilenet/tensorflow.py +++ b/doctr/models/classification/mobilenet/tensorflow.py @@ -282,12 +282,27 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> MobileNetV3: `"Searching for MobileNetV3", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenetv3_large - >>> model = mobilenetv3_small(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenet_v3_small(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenet_v3_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -304,12 +319,27 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3 `"Searching for MobileNetV3", `_, with rectangular pooling. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenet_v3_small_r - >>> model = mobilenet_v3_small_r(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small_r + >>> model = mobilenet_v3_small_r(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_r + >>> model = mobilenet_v3_small_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -326,12 +356,27 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> MobileNetV3: `"Searching for MobileNetV3", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenetv3_large - >>> model = mobilenetv3_large(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_large + >>> model = mobilenet_v3_large(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_large + >>> model = mobilenet_v3_large(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -347,12 +392,27 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3 `"Searching for MobileNetV3", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenet_v3_large_r - >>> model = mobilenet_v3_large_r(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_large_r + >>> model = mobilenet_v3_large_r(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_large_r + >>> model = mobilenet_v3_large_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -368,12 +428,27 @@ def mobilenet_v3_small_orientation(pretrained: bool = False, **kwargs: Any) -> M `"Searching for MobileNetV3", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import mobilenet_v3_small_orientation - >>> model = mobilenet_v3_small_orientation(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import mobilenet_v3_small_orientation + >>> model = mobilenet_v3_small_orientation(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_orientation + >>> model = mobilenet_v3_small_orientation(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/resnet/pytorch.py b/doctr/models/classification/resnet/pytorch.py index b77510cd3d..269c62f577 100644 --- a/doctr/models/classification/resnet/pytorch.py +++ b/doctr/models/classification/resnet/pytorch.py @@ -192,12 +192,27 @@ def resnet18(pretrained: bool = False, **kwargs: Any) -> TVResNet: """ResNet-18 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import torch - >>> from doctr.models import resnet18 - >>> model = resnet18(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet18 + >>> model = resnet18(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet18 + >>> model = resnet18(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -214,12 +229,27 @@ def resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition", `_. Downsizing: (H, W) --> (H/8, W/4) - Example:: - >>> import torch - >>> from doctr.models import resnet31 - >>> model = resnet31(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet31 + >>> model = resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet31 + >>> model = resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -246,12 +276,27 @@ def resnet34(pretrained: bool = False, **kwargs: Any) -> TVResNet: """ResNet-34 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import torch - >>> from doctr.models import resnet34 - >>> model = resnet34(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet34 + >>> model = resnet34(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet34 + >>> model = resnet34(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -267,12 +312,27 @@ def resnet34_wide(pretrained: bool = False, **kwargs: Any) -> ResNet: """ResNet-34 architecture as described in `"Deep Residual Learning for Image Recognition", `_ with twice as many output channels. - Example:: - >>> import torch - >>> from doctr.models import resnet34_wide - >>> model = resnet34_wide(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet34_wide + >>> model = resnet34_wide(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet34_wide + >>> model = resnet34_wide(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -299,12 +359,27 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> TVResNet: """ResNet-50 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import torch - >>> from doctr.models import resnet50 - >>> model = resnet50(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet50 + >>> model = resnet50(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet50 + >>> model = resnet50(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/resnet/tensorflow.py b/doctr/models/classification/resnet/tensorflow.py index 6a17943ec3..c6d2a5bc0a 100644 --- a/doctr/models/classification/resnet/tensorflow.py +++ b/doctr/models/classification/resnet/tensorflow.py @@ -227,12 +227,27 @@ def resnet18(pretrained: bool = False, **kwargs: Any) -> ResNet: """Resnet-18 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import resnet18 - >>> model = resnet18(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet18 + >>> model = resnet18(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet18 + >>> model = resnet18(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -259,12 +274,27 @@ def resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition", `_. Downsizing: (H, W) --> (H/8, W/4) - Example:: - >>> import tensorflow as tf - >>> from doctr.models import resnet31 - >>> model = resnet31(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet31 + >>> model = resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet31 + >>> model = resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -291,12 +321,27 @@ def resnet34(pretrained: bool = False, **kwargs: Any) -> ResNet: """Resnet-34 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import resnet34 - >>> model = resnet34(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet34 + >>> model = resnet34(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet34 + >>> model = resnet34(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -322,12 +367,27 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> ResNet: """Resnet-50 architecture as described in `"Deep Residual Learning for Image Recognition", `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import resnet50 - >>> model = resnet50(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet50 + >>> model = resnet50(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet50 + >>> model = resnet50(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained @@ -359,12 +419,27 @@ def resnet34_wide(pretrained: bool = False, **kwargs: Any) -> ResNet: """Resnet-34 architecture as described in `"Deep Residual Learning for Image Recognition", `_ with twice as many output channels for each stage. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import resnet34_wide - >>> model = resnet34_wide(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import resnet34_wide + >>> model = resnet34_wide(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import resnet34_wide + >>> model = resnet34_wide(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained: boolean, True if model is pretrained diff --git a/doctr/models/classification/vgg/pytorch.py b/doctr/models/classification/vgg/pytorch.py index abf133e687..721a5d1760 100644 --- a/doctr/models/classification/vgg/pytorch.py +++ b/doctr/models/classification/vgg/pytorch.py @@ -58,12 +58,27 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG: `_, modified by adding batch normalization, rectangular pooling and a simpler classification head. - Example:: - >>> import torch - >>> from doctr.models import vgg16_bn_r - >>> model = vgg16_bn_r(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import vgg16_bn_r + >>> model = vgg16_bn_r(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import vgg16_bn_r + >>> model = vgg16_bn_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on ImageNet diff --git a/doctr/models/classification/vgg/tensorflow.py b/doctr/models/classification/vgg/tensorflow.py index 39e4f940ea..82c2a6123b 100644 --- a/doctr/models/classification/vgg/tensorflow.py +++ b/doctr/models/classification/vgg/tensorflow.py @@ -91,12 +91,27 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> VGG: `_, modified by adding batch normalization, rectangular pooling and a simpler classification head. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import vgg16_bn_r - >>> model = vgg16_bn_r(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import vgg16_bn_r + >>> model = vgg16_bn_r(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 512, 512, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import vgg16_bn_r + >>> model = vgg16_bn_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on ImageNet diff --git a/doctr/models/classification/zoo.py b/doctr/models/classification/zoo.py index 8450129a14..5078a2c458 100644 --- a/doctr/models/classification/zoo.py +++ b/doctr/models/classification/zoo.py @@ -49,12 +49,11 @@ def crop_orientation_predictor( ) -> CropOrientationPredictor: """Orientation classification architecture. - Example:: - >>> import numpy as np - >>> from doctr.models import crop_orientation_predictor - >>> model = crop_orientation_predictor(arch='classif_mobilenet_v3_small', pretrained=True) - >>> input_crop = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) - >>> out = model([input_crop]) + >>> import numpy as np + >>> from doctr.models import crop_orientation_predictor + >>> model = crop_orientation_predictor(arch='classif_mobilenet_v3_small', pretrained=True) + >>> input_crop = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_crop]) Args: arch: name of the architecture to use (e.g. 'mobilenet_v3_small') diff --git a/doctr/models/detection/differentiable_binarization/pytorch.py b/doctr/models/detection/differentiable_binarization/pytorch.py index dbe49b0be9..f64fd17f08 100644 --- a/doctr/models/detection/differentiable_binarization/pytorch.py +++ b/doctr/models/detection/differentiable_binarization/pytorch.py @@ -291,12 +291,17 @@ def db_resnet34(pretrained: bool = False, **kwargs: Any) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a ResNet-34 backbone. - Example:: - >>> import torch - >>> from doctr.models import db_resnet34 - >>> model = db_resnet34(pretrained=True) - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_resnet34 + >>> model = db_resnet34(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -319,12 +324,27 @@ def db_resnet50(pretrained: bool = False, **kwargs: Any) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a ResNet-50 backbone. - Example:: - >>> import torch - >>> from doctr.models import db_resnet50 - >>> model = db_resnet50(pretrained=True) - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import db_resnet50 + >>> model = db_resnet50(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_resnet50 + >>> model = db_resnet50(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -347,12 +367,27 @@ def db_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a MobileNet V3 Large backbone. - Example:: - >>> import torch - >>> from doctr.models import db_mobilenet_v3_large - >>> model = db_mobilenet_v3_large(pretrained=True) - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import db_mobilenet_v3_large + >>> model = db_mobilenet_v3_large(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_mobilenet_v3_large + >>> model = db_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -376,12 +411,17 @@ def db_resnet50_rotation(pretrained: bool = False, **kwargs: Any) -> DBNet: `_, using a ResNet-50 backbone. This model is trained with rotated documents - Example:: - >>> import torch - >>> from doctr.models import db_resnet50_rotation - >>> model = db_resnet50_rotation(pretrained=True) - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_resnet50_rotation + >>> model = db_resnet50_rotation(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset diff --git a/doctr/models/detection/differentiable_binarization/tensorflow.py b/doctr/models/detection/differentiable_binarization/tensorflow.py index 184f84cb59..fb521bc51f 100644 --- a/doctr/models/detection/differentiable_binarization/tensorflow.py +++ b/doctr/models/detection/differentiable_binarization/tensorflow.py @@ -321,12 +321,27 @@ def db_resnet50(pretrained: bool = False, **kwargs: Any) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a ResNet-50 backbone. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import db_resnet50 - >>> model = db_resnet50(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import db_resnet50 + >>> model = db_resnet50(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_resnet50 + >>> model = db_resnet50(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -348,12 +363,27 @@ def db_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a mobilenet v3 large backbone. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import db_mobilenet_v3_large - >>> model = db_mobilenet_v3_large(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import db_mobilenet_v3_large + >>> model = db_mobilenet_v3_large(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import db_mobilenet_v3_large + >>> model = db_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset diff --git a/doctr/models/detection/linknet/pytorch.py b/doctr/models/detection/linknet/pytorch.py index 048c370391..bef1718408 100644 --- a/doctr/models/detection/linknet/pytorch.py +++ b/doctr/models/detection/linknet/pytorch.py @@ -248,12 +248,27 @@ def linknet_resnet18(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import torch - >>> from doctr.models import linknet_resnet18 - >>> model = linknet_resnet18(pretrained=True).eval() - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> with torch.no_grad(): out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -269,12 +284,27 @@ def linknet_resnet34(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import torch - >>> from doctr.models import linknet_resnet34 - >>> model = linknet_resnet34(pretrained=True).eval() - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> with torch.no_grad(): out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet34 + >>> model = linknet_resnet34(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet34 + >>> model = linknet_resnet34(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -290,12 +320,27 @@ def linknet_resnet50(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import torch - >>> from doctr.models import linknet_resnet50 - >>> model = linknet_resnet50(pretrained=True).eval() - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> with torch.no_grad(): out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet50 + >>> model = linknet_resnet50(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet50 + >>> model = linknet_resnet50(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset diff --git a/doctr/models/detection/linknet/tensorflow.py b/doctr/models/detection/linknet/tensorflow.py index 22797dc2d8..285c25c545 100644 --- a/doctr/models/detection/linknet/tensorflow.py +++ b/doctr/models/detection/linknet/tensorflow.py @@ -269,12 +269,27 @@ def linknet_resnet18(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import linknet_resnet18 - >>> model = linknet_resnet18(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -296,12 +311,17 @@ def linknet_resnet18_rotation(pretrained: bool = False, **kwargs: Any) -> LinkNe """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import linknet_resnet18 - >>> model = linknet_resnet18(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -323,12 +343,27 @@ def linknet_resnet34(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import linknet_resnet34 - >>> model = linknet_resnet34(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet34 + >>> model = linknet_resnet34(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet34 + >>> model = linknet_resnet34(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset @@ -350,12 +385,27 @@ def linknet_resnet50(pretrained: bool = False, **kwargs: Any) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import linknet_resnet50 - >>> model = linknet_resnet50(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import linknet_resnet50 + >>> model = linknet_resnet50(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 1024, 1024, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import linknet_resnet50 + >>> model = linknet_resnet50(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text detection dataset diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py index d4244d869f..3b286422c0 100644 --- a/doctr/models/detection/zoo.py +++ b/doctr/models/detection/zoo.py @@ -57,12 +57,11 @@ def detection_predictor( ) -> DetectionPredictor: """Text detection architecture. - Example:: - >>> import numpy as np - >>> from doctr.models import detection_predictor - >>> model = detection_predictor(arch='db_resnet50', pretrained=True) - >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) - >>> out = model([input_page]) + >>> import numpy as np + >>> from doctr.models import detection_predictor + >>> model = detection_predictor(arch='db_resnet50', pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_page]) Args: arch: name of the architecture to use (e.g. 'db_resnet50') diff --git a/doctr/models/obj_detection/faster_rcnn/pytorch.py b/doctr/models/obj_detection/faster_rcnn/pytorch.py index 03da09a230..9f494d3a26 100644 --- a/doctr/models/obj_detection/faster_rcnn/pytorch.py +++ b/doctr/models/obj_detection/faster_rcnn/pytorch.py @@ -60,12 +60,17 @@ def fasterrcnn_mobilenet_v3_large_fpn(pretrained: bool = False, **kwargs: Any) - """Faster-RCNN architecture with a MobileNet V3 backbone as described in `"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" `_. - Example:: - >>> import torch - >>> from doctr.models.obj_detection import fasterrcnn_mobilenet_v3_large_fpn - >>> model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True).eval() - >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - >>> with torch.no_grad(): out = model(input_tensor) + .. tabs:: + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models.obj_detection import fasterrcnn_mobilenet_v3_large_fpn + >>> model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> with torch.no_grad(): out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our object detection dataset diff --git a/doctr/models/recognition/crnn/pytorch.py b/doctr/models/recognition/crnn/pytorch.py index 693a963c33..46477da814 100644 --- a/doctr/models/recognition/crnn/pytorch.py +++ b/doctr/models/recognition/crnn/pytorch.py @@ -249,12 +249,27 @@ def crnn_vgg16_bn(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import torch - >>> from doctr.models import crnn_vgg16_bn - >>> model = crnn_vgg16_bn(pretrained=True) - >>> input_tensor = torch.rand(1, 3, 32, 128) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_vgg16_bn + >>> model = crnn_vgg16_bn(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_vgg16_bn + >>> model = crnn_vgg16_bn(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset @@ -270,12 +285,27 @@ def crnn_mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import torch - >>> from doctr.models import crnn_mobilenet_v3_small - >>> model = crnn_mobilenet_v3_small(pretrained=True) - >>> input_tensor = torch.rand(1, 3, 32, 128) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_mobilenet_v3_small + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_small + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset @@ -291,12 +321,27 @@ def crnn_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import torch - >>> from doctr.models import crnn_mobilenet_v3_large - >>> model = crnn_mobilenet_v3_large(pretrained=True) - >>> input_tensor = torch.rand(1, 3, 32, 128) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_mobilenet_v3_large + >>> model = crnn_mobilenet_v3_large(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_large + >>> model = crnn_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset diff --git a/doctr/models/recognition/crnn/tensorflow.py b/doctr/models/recognition/crnn/tensorflow.py index 89b44f2a8e..22070e8f2f 100644 --- a/doctr/models/recognition/crnn/tensorflow.py +++ b/doctr/models/recognition/crnn/tensorflow.py @@ -219,12 +219,27 @@ def crnn_vgg16_bn(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import crnn_vgg16_bn - >>> model = crnn_vgg16_bn(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_vgg16_bn + >>> model = crnn_vgg16_bn(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_vgg16_bn + >>> model = crnn_vgg16_bn(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset @@ -240,12 +255,27 @@ def crnn_mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import crnn_mobilenet_v3_small - >>> model = crnn_mobilenet_v3_small(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_mobilenet_v3_small + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_small + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset @@ -261,12 +291,27 @@ def crnn_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> CRNN: """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import crnn_mobilenet_v3_large - >>> model = crnn_mobilenet_v3_large(pretrained=True) - >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import crnn_mobilenet_v3_large + >>> model = crnn_mobilenet_v3_large(pretrained=True) + >>> input_tensor = tf.random.uniform(shape=[1, 32, 128, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_large + >>> model = crnn_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset diff --git a/doctr/models/recognition/master/pytorch.py b/doctr/models/recognition/master/pytorch.py index 6b9b37d005..f5875efe11 100644 --- a/doctr/models/recognition/master/pytorch.py +++ b/doctr/models/recognition/master/pytorch.py @@ -280,14 +280,32 @@ def _master( def master(pretrained: bool = False, **kwargs: Any) -> MASTER: """MASTER as described in paper: `_. - Example:: - >>> import torch - >>> from doctr.models import master - >>> model = master(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 48, 160)) - >>> out = model(input_tensor) + + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import master + >>> model = master(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 48, 160, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import master + >>> model = master(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 48, 160)) + >>> out = model(input_tensor) + Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + Returns: text recognition architecture """ diff --git a/doctr/models/recognition/master/tensorflow.py b/doctr/models/recognition/master/tensorflow.py index 30a1ccc662..c109fb2ad0 100644 --- a/doctr/models/recognition/master/tensorflow.py +++ b/doctr/models/recognition/master/tensorflow.py @@ -283,12 +283,27 @@ def _master( def master(pretrained: bool = False, **kwargs: Any) -> MASTER: """MASTER as described in paper: `_. - Example:: - >>> import tensorflow as tf - >>> from doctr.models import master - >>> model = master(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 48, 160, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import master + >>> model = master(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 48, 160, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import master + >>> model = master(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 48, 160)) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset diff --git a/doctr/models/recognition/sar/pytorch.py b/doctr/models/recognition/sar/pytorch.py index 0f818948c4..0469700e39 100644 --- a/doctr/models/recognition/sar/pytorch.py +++ b/doctr/models/recognition/sar/pytorch.py @@ -332,12 +332,27 @@ def sar_resnet31(pretrained: bool = False, **kwargs: Any) -> SAR: """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition" `_. - Example: - >>> import torch - >>> from doctr.models import sar_resnet31 - >>> model = sar_resnet31(pretrained=False) - >>> input_tensor = torch.rand((1, 3, 32, 128)) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import sar_resnet31 + >>> model = sar_resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 64, 256, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import sar_resnet31 + >>> model = sar_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset diff --git a/doctr/models/recognition/sar/tensorflow.py b/doctr/models/recognition/sar/tensorflow.py index f8d641752d..f60a2c3cbf 100644 --- a/doctr/models/recognition/sar/tensorflow.py +++ b/doctr/models/recognition/sar/tensorflow.py @@ -344,12 +344,27 @@ def sar_resnet31(pretrained: bool = False, **kwargs: Any) -> SAR: """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition" `_. - Example: - >>> import tensorflow as tf - >>> from doctr.models import sar_resnet31 - >>> model = sar_resnet31(pretrained=False) - >>> input_tensor = tf.random.uniform(shape=[1, 64, 256, 3], maxval=1, dtype=tf.float32) - >>> out = model(input_tensor) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> import tensorflow as tf + >>> from doctr.models import sar_resnet31 + >>> model = sar_resnet31(pretrained=False) + >>> input_tensor = tf.random.uniform(shape=[1, 64, 256, 3], maxval=1, dtype=tf.float32) + >>> out = model(input_tensor) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.models import sar_resnet31 + >>> model = sar_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) Args: pretrained (bool): If True, returns a model pre-trained on our text recognition dataset diff --git a/doctr/models/recognition/utils.py b/doctr/models/recognition/utils.py index d5bf5cc883..e8b80d0b1b 100644 --- a/doctr/models/recognition/utils.py +++ b/doctr/models/recognition/utils.py @@ -13,6 +13,12 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str: """Merges 2 character sequences in the best way to maximize the alignment of their overlapping characters. + >>> from doctr.model.recognition.utils import merge_sequences + >>> merge_sequences('abcd', 'cdefgh', 1.4) + 'abcdefgh' + >>> merge_sequences('abcdi', 'cdefgh', 1.4) + 'abcdefgh' + Args: a: first char seq, suffix should be similar to b's prefix. b: second char seq, prefix should be similar to a's suffix. @@ -21,13 +27,6 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str: Returns: A merged character sequence. - - Example:: - >>> from doctr.model.recognition.utils import merge_sequences - >>> merge_sequences('abcd', 'cdefgh', 1.4) - 'abcdefgh' - >>> merge_sequences('abcdi', 'cdefgh', 1.4) - 'abcdefgh' """ seq_len = min(len(a), len(b)) if seq_len == 0: # One sequence is empty, return the other @@ -62,6 +61,10 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str: def merge_multi_strings(seq_list: List[str], dil_factor: float) -> str: """Recursively merges consecutive string sequences with overlapping characters. + >>> from doctr.model.recognition.utils import merge_multi_sequences + >>> merge_multi_sequences(['abc', 'bcdef', 'difghi', 'aijkl'], 1.4) + 'abcdefghijkl' + Args: seq_list: list of sequences to merge. Sequences need to be ordered from left to right. dil_factor: dilation factor of the boxes to overlap, should be > 1. This parameter is @@ -69,11 +72,6 @@ def merge_multi_strings(seq_list: List[str], dil_factor: float) -> str: Returns: A merged character sequence - - Example:: - >>> from doctr.model.recognition.utils import merge_multi_sequences - >>> merge_multi_sequences(['abc', 'bcdef', 'difghi', 'aijkl'], 1.4) - 'abcdefghijkl' """ def _recursive_merge(a: str, seq_list: List[str], dil_factor: float) -> str: # Recursive version of compute_overlap diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py index ce697b6f58..f5b7cd72a4 100644 --- a/doctr/models/recognition/zoo.py +++ b/doctr/models/recognition/zoo.py @@ -38,12 +38,11 @@ def _predictor(arch: str, pretrained: bool, **kwargs: Any) -> RecognitionPredict def recognition_predictor(arch: str = 'crnn_vgg16_bn', pretrained: bool = False, **kwargs: Any) -> RecognitionPredictor: """Text recognition architecture. - Example:: - >>> import numpy as np - >>> from doctr.models import recognition_predictor - >>> model = recognition_predictor(pretrained=True) - >>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8) - >>> out = model([input_page]) + >>> import numpy as np + >>> from doctr.models import recognition_predictor + >>> model = recognition_predictor(pretrained=True) + >>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8) + >>> out = model([input_page]) Args: arch: name of the architecture to use (e.g. 'crnn_vgg16_bn') diff --git a/doctr/models/utils/pytorch.py b/doctr/models/utils/pytorch.py index d1e084883b..9fd555b1f1 100644 --- a/doctr/models/utils/pytorch.py +++ b/doctr/models/utils/pytorch.py @@ -23,9 +23,8 @@ def load_pretrained_params( ) -> None: """Load a set of parameters onto a model - Example:: - >>> from doctr.models import load_pretrained_params - >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.zip") + >>> from doctr.models import load_pretrained_params + >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.zip") Args: model: the keras model to be loaded @@ -55,10 +54,23 @@ def conv_sequence_pt( ) -> List[nn.Module]: """Builds a convolutional-based layer sequence - Example:: - >>> from doctr.models import conv_sequence - >>> from torch.nn import Sequential - >>> module = Sequential(conv_sequence(3, 32, True, True, kernel_size=3)) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.models import conv_sequence + >>> from tensorflow.keras import Sequential + >>> module = Sequential(conv_sequence(32, 'relu', True, kernel_size=3, input_shape=[224, 224, 3])) + + .. tab:: PyTorch + + .. code:: python + + >>> from doctr.models import conv_sequence + >>> from torch.nn import Sequential + >>> module = Sequential(conv_sequence(3, 32, True, True, kernel_size=3)) Args: out_channels: number of output channels diff --git a/doctr/models/utils/tensorflow.py b/doctr/models/utils/tensorflow.py index 1205336f78..c8e4552f69 100644 --- a/doctr/models/utils/tensorflow.py +++ b/doctr/models/utils/tensorflow.py @@ -28,9 +28,8 @@ def load_pretrained_params( ) -> None: """Load a set of parameters onto a model - Example:: - >>> from doctr.models import load_pretrained_params - >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.zip") + >>> from doctr.models import load_pretrained_params + >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.zip") Args: model: the keras model to be loaded @@ -65,10 +64,23 @@ def conv_sequence( ) -> List[layers.Layer]: """Builds a convolutional-based layer sequence - Example:: - >>> from doctr.models import conv_sequence - >>> from tensorflow.keras import Sequential - >>> module = Sequential(conv_sequence(32, 'relu', True, kernel_size=3, input_shape=[224, 224, 3])) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.models import conv_sequence + >>> from tensorflow.keras import Sequential + >>> module = Sequential(conv_sequence(32, 'relu', True, kernel_size=3, input_shape=[224, 224, 3])) + + .. tab:: PyTorch + + .. code:: python + + >>> from doctr.models import conv_sequence + >>> from torch.nn import Sequential + >>> module = Sequential(conv_sequence(3, 32, True, True, kernel_size=3)) Args: out_channels: number of output channels @@ -101,11 +113,10 @@ def conv_sequence( class IntermediateLayerGetter(Model): """Implements an intermediate layer getter - Example:: - >>> from doctr.models import IntermediateLayerGetter - >>> from tensorflow.keras.applications import ResNet50 - >>> target_layers = ["conv2_block3_out", "conv3_block4_out", "conv4_block6_out", "conv5_block3_out"] - >>> feat_extractor = IntermediateLayerGetter(ResNet50(include_top=False, pooling=False), target_layers) + >>> from doctr.models import IntermediateLayerGetter + >>> from tensorflow.keras.applications import ResNet50 + >>> target_layers = ["conv2_block3_out", "conv3_block4_out", "conv4_block6_out", "conv5_block3_out"] + >>> feat_extractor = IntermediateLayerGetter(ResNet50(include_top=False, pooling=False), target_layers) Args: model: the model to extract feature maps from diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py index 2973315ad7..bfd81825b3 100644 --- a/doctr/models/zoo.py +++ b/doctr/models/zoo.py @@ -59,12 +59,11 @@ def ocr_predictor( ) -> OCRPredictor: """End-to-end OCR architecture using one model for localization, and another for text recognition. - Example:: - >>> import numpy as np - >>> from doctr.models import ocr_predictor - >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn', pretrained=True) - >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) - >>> out = model([input_page]) + >>> import numpy as np + >>> from doctr.models import ocr_predictor + >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn', pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_page]) Args: det_arch: name of the detection architecture to use (e.g. 'db_resnet50', 'db_mobilenet_v3_large') diff --git a/doctr/transforms/modules/base.py b/doctr/transforms/modules/base.py index 81fcb568ac..3731f29f03 100644 --- a/doctr/transforms/modules/base.py +++ b/doctr/transforms/modules/base.py @@ -18,12 +18,29 @@ class SampleCompose(NestedObject): """Implements a wrapper that will apply transformations sequentially on both image and target - Example:: - >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate - >>> import tensorflow as tf - >>> import numpy as np - >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)]) - >>> out, out_boxes = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4))) + + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate + >>> import tensorflow as tf + >>> import numpy as np + >>> transfo = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)]) + >>> out, out_boxes = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4))) + + .. tab:: PyTorch + + .. code:: python + + >>> import numpy as np + >>> import torch + >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate + >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)]) + >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4))) + Args: transforms: list of transformation modules """ @@ -42,11 +59,27 @@ def __call__(self, x: Any, target: Any) -> Tuple[Any, Any]: class ImageTransform(NestedObject): """Implements a transform wrapper to turn an image-only transformation into an image+target transform - Example:: - >>> from doctr.transforms import ImageTransform, ColorInversion - >>> import tensorflow as tf - >>> transfo = ImageTransform(ColorInversion((32, 32))) - >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None) + + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.transforms import ImageTransform, ColorInversion + >>> import tensorflow as tf + >>> transfo = ImageTransform(ColorInversion((32, 32))) + >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.transforms import ImageTransform, ColorInversion + >>> transfo = ImageTransform(ColorInversion((32, 32))) + >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None) + Args: transform: the image transformation module to wrap """ @@ -65,11 +98,25 @@ class ColorInversion(NestedObject): """Applies the following tranformation to a tensor (image or batch of images): convert to grayscale, colorize (shift 0-values randomly), and then invert colors - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = ColorInversion(min_val=0.6) - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.transforms import ColorInversion + >>> import tensorflow as tf + >>> transfo = ColorInversion(min_val=0.6) + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.transforms import ColorInversion + >>> transfo = ColorInversion(min_val=0.6) + >>> out = transfo(torch.rand(8, 64, 64, 3)) Args: min_val: range [min_val, 1] to colorize RGB pixels @@ -87,11 +134,25 @@ def __call__(self, img: Any) -> Any: class OneOf(NestedObject): """Randomly apply one of the input transformations - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = OneOf([JpegQuality(), Gamma()]) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.transforms import OneOf + >>> import tensorflow as tf + >>> transfo = OneOf([JpegQuality(), Gamma()]) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.transforms import OneOf + >>> transfo = OneOf([JpegQuality(), Gamma()]) + >>> out = transfo(torch.rand(1, 64, 64, 3)) Args: transforms: list of transformations, one only will be picked @@ -112,11 +173,25 @@ def __call__(self, img: Any) -> Any: class RandomApply(NestedObject): """Apply with a probability p the input transformation - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = RandomApply(Gamma(), p=.5) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + .. tabs:: + + .. tab:: TensorFlow + + .. code:: python + + >>> from doctr.transforms import RandomApply + >>> import tensorflow as tf + >>> transfo = RandomApply(Gamma(), p=.5) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + + .. tab:: PyTorch + + .. code:: python + + >>> import torch + >>> from doctr.transforms import RandomApply + >>> transfo = RandomApply(Gamma(), p=.5) + >>> out = transfo(torch.rand(1, 64, 64, 3)) Args: transform: transformation to apply diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index d5bacf1015..2b1274cf38 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -116,16 +116,15 @@ def __repr__(self) -> str: class GaussianNoise(torch.nn.Module): """Adds Gaussian Noise to the input tensor - Example:: - >>> from doctr.transforms import GaussianNoise - >>> import torch - >>> transfo = GaussianNoise(0., 1.) - >>> out = transfo(torch.rand((3, 224, 224))) - - Args: - mean : mean of the gaussian distribution - std : std of the gaussian distribution - """ + >>> from doctr.transforms import GaussianNoise + >>> import torch + >>> transfo = GaussianNoise(0., 1.) + >>> out = transfo(torch.rand((3, 224, 224))) + + Args: + mean : mean of the gaussian distribution + std : std of the gaussian distribution + """ def __init__(self, mean: float = 0., std: float = 1.) -> None: super().__init__() self.std = std @@ -166,6 +165,7 @@ def forward( Args: img: Image to be flipped. target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys + Returns: Tuple of PIL Image or Tensor and target """ @@ -181,15 +181,15 @@ def forward( class RandomShadow(torch.nn.Module): """Adds random shade to the input image - Example:: - >>> from doctr.transforms import RandomShadow - >>> import tensorflow as tf - >>> transfo = RandomShadow(0., 1.) - >>> out = transfo(torch.rand((3, 64, 64))) + >>> from doctr.transforms import RandomShadow + >>> import tensorflow as tf + >>> transfo = RandomShadow(0., 1.) + >>> out = transfo(torch.rand((3, 64, 64))) + + Args: + opacity_range : minimum and maximum opacity of the shade + """ - Args: - opacity_range : minimum and maximum opacity of the shade - """ def __init__(self, opacity_range: Tuple[float, float] = None) -> None: super().__init__() self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (.2, .8) diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index 0e01176939..ca2bdf7458 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -22,11 +22,10 @@ class Compose(NestedObject): """Implements a wrapper that will apply transformations sequentially - Example:: - >>> from doctr.transforms import Compose, Resize - >>> import tensorflow as tf - >>> transfos = Compose([Resize((32, 32))]) - >>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Compose, Resize + >>> import tensorflow as tf + >>> transfos = Compose([Resize((32, 32))]) + >>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) Args: transforms: list of transformation modules @@ -47,11 +46,10 @@ def __call__(self, x: Any) -> Any: class Resize(NestedObject): """Resizes a tensor to a target size - Example:: - >>> from doctr.transforms import Resize - >>> import tensorflow as tf - >>> transfo = Resize((32, 32)) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Resize + >>> import tensorflow as tf + >>> transfo = Resize((32, 32)) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) Args: output_size: expected output size @@ -138,11 +136,10 @@ def __call__( class Normalize(NestedObject): """Normalize a tensor to a Gaussian distribution for each channel - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: mean: average value per channel @@ -164,11 +161,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class LambdaTransformation(NestedObject): """Normalize a tensor to a Gaussian distribution for each channel - Example:: - >>> from doctr.transforms import LambdaTransformation - >>> import tensorflow as tf - >>> transfo = LambdaTransformation(lambda x: x/ 255.) - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import LambdaTransformation + >>> import tensorflow as tf + >>> transfo = LambdaTransformation(lambda x: x/ 255.) + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: fn: the function to be applied to the input tensor @@ -183,11 +179,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class ToGray(NestedObject): """Convert a RGB tensor (batch of images or image) to a 3-channels grayscale tensor - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = ToGray() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = ToGray() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) """ def __init__(self, num_output_channels: int = 1): self.num_output_channels = num_output_channels @@ -201,11 +196,10 @@ class RandomBrightness(NestedObject): """Randomly adjust brightness of a tensor (batch of images or image) by adding a delta to all pixels - Example: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Brightness() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Brightness() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta] @@ -225,11 +219,10 @@ class RandomContrast(NestedObject): """Randomly adjust contrast of a tensor (batch of images or image) by adjusting each pixel: (img - mean) * contrast_factor + mean. - Example: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Contrast() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Contrast() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1) @@ -248,11 +241,10 @@ class RandomSaturation(NestedObject): """Randomly adjust saturation of a tensor (batch of images or image) by converting to HSV and increasing saturation by a factor. - Example: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Saturation() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Saturation() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1) @@ -270,11 +262,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class RandomHue(NestedObject): """Randomly adjust hue of a tensor (batch of images or image) by converting to HSV and adding a delta - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Hue() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Hue() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta] @@ -292,11 +283,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class RandomGamma(NestedObject): """randomly performs gamma correction for a tensor (batch of images or image) - Example: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = Gamma() - >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = Gamma() + >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) Args: min_gamma: non-negative real number, lower bound for gamma param @@ -329,11 +319,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class RandomJpegQuality(NestedObject): """Randomly adjust jpeg quality of a 3 dimensional RGB image - Example:: - >>> from doctr.transforms import Normalize - >>> import tensorflow as tf - >>> transfo = JpegQuality() - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import Normalize + >>> import tensorflow as tf + >>> transfo = JpegQuality() + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) Args: min_quality: int between [0, 100] @@ -355,11 +344,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class GaussianBlur(NestedObject): """Randomly adjust jpeg quality of a 3 dimensional RGB image - Example:: - >>> from doctr.transforms import GaussianBlur - >>> import tensorflow as tf - >>> transfo = GaussianBlur(3, (.1, 5)) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import GaussianBlur + >>> import tensorflow as tf + >>> transfo = GaussianBlur(3, (.1, 5)) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) Args: kernel_shape: size of the blurring kernel @@ -393,11 +381,10 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: class GaussianNoise(NestedObject): """Adds Gaussian Noise to the input tensor - Example:: - >>> from doctr.transforms import GaussianNoise - >>> import tensorflow as tf - >>> transfo = GaussianNoise(0., 1.) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import GaussianNoise + >>> import tensorflow as tf + >>> transfo = GaussianNoise(0., 1.) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) Args: mean : mean of the gaussian distribution @@ -424,20 +411,20 @@ def extra_repr(self) -> str: class RandomHorizontalFlip(NestedObject): + """Adds random horizontal flip to the input tensor/np.ndarray - Example:: - >>> from doctr.transforms import RandomHorizontalFlip - >>> import tensorflow as tf - >>> transfo = RandomHorizontalFlip(p=0.5) - >>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1) - >>> target = { - "boxes": np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32), - "labels": np.ones(1, dtype= np.int64) - } - >>> out = transfo(image, target) - - Args: + >>> from doctr.transforms import RandomHorizontalFlip + >>> import tensorflow as tf + >>> transfo = RandomHorizontalFlip(p=0.5) + >>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1) + >>> target = { + >>> "boxes": np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32), + >>> "labels": np.ones(1, dtype= np.int64) + >>> } + >>> out = transfo(image, target) + + Args: p : probability of Horizontal Flip """ def __init__(self, p: float) -> None: @@ -453,6 +440,7 @@ def __call__( Args: img: Image to be flipped. target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys + Returns: Tuple of numpy nd-array or Tensor and target """ @@ -468,15 +456,14 @@ def __call__( class RandomShadow(NestedObject): """Adds random shade to the input image - Example:: - >>> from doctr.transforms import RandomShadow - >>> import tensorflow as tf - >>> transfo = RandomShadow(0., 1.) - >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) + >>> from doctr.transforms import RandomShadow + >>> import tensorflow as tf + >>> transfo = RandomShadow(0., 1.) + >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) - Args: - opacity_range : minimum and maximum opacity of the shade - """ + Args: + opacity_range : minimum and maximum opacity of the shade + """ def __init__(self, opacity_range: Tuple[float, float] = None) -> None: super().__init__() self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (.2, .8) diff --git a/doctr/utils/data.py b/doctr/utils/data.py index b3aff3398b..e399f18d12 100644 --- a/doctr/utils/data.py +++ b/doctr/utils/data.py @@ -52,9 +52,8 @@ def download_from_url( ) -> Path: """Download a file using its URL - Example:: - >>> from doctr.models import download_from_url - >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip") + >>> from doctr.models import download_from_url + >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip") Args: url: the URL of the file to download diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 1a7582ce4f..641ffcc684 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -135,10 +135,12 @@ def remap_boxes( """ Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. + Args: loc_preds: (N, 4, 2) array of RELATIVE loc_preds orig_shape: shape of the origin image dest_shape: shape of the destination image + Returns: A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial """ diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py index 4c01574a15..de5cf45aed 100644 --- a/doctr/utils/metrics.py +++ b/doctr/utils/metrics.py @@ -58,11 +58,10 @@ class TextMatch: where :math:`\mathcal{W}` is the set of all possible character sequences, :math:`N` is a strictly positive integer. - Example:: - >>> from doctr.utils import TextMatch - >>> metric = TextMatch() - >>> metric.update(['Hello', 'world'], ['hello', 'world']) - >>> metric.summary() + >>> from doctr.utils import TextMatch + >>> metric = TextMatch() + >>> metric.update(['Hello', 'world'], ['hello', 'world']) + >>> metric.summary() """ def __init__(self) -> None: @@ -123,6 +122,7 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: Args: boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax) boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax) + Returns: the IoU matrix of shape (N, M) """ @@ -152,6 +152,7 @@ def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: Args: boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax) boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax) + Returns: the IoA matrix of shape (N, M) """ @@ -358,12 +359,11 @@ class LocalizationConfusion: where :math:`\mathcal{B}` is the set of possible bounding boxes, :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. - Example:: - >>> import numpy as np - >>> from doctr.utils import LocalizationConfusion - >>> metric = LocalizationConfusion(iou_thresh=0.5) - >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]])) - >>> metric.summary() + >>> import numpy as np + >>> from doctr.utils import LocalizationConfusion + >>> metric = LocalizationConfusion(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]])) + >>> metric.summary() Args: iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match @@ -463,13 +463,12 @@ class OCRMetric: :math:`\mathcal{L}` is the set of possible character sequences, :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. - Example:: - >>> import numpy as np - >>> from doctr.utils import OCRMetric - >>> metric = OCRMetric(iou_thresh=0.5) - >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), - ['hello'], ['hello', 'world']) - >>> metric.summary() + >>> import numpy as np + >>> from doctr.utils import OCRMetric + >>> metric = OCRMetric(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), + >>> ['hello'], ['hello', 'world']) + >>> metric.summary() Args: iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match @@ -601,13 +600,12 @@ class DetectionMetric: :math:`\mathcal{C}` is the set of possible class indices, :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. - Example:: - >>> import numpy as np - >>> from doctr.utils import DetectionMetric - >>> metric = DetectionMetric(iou_thresh=0.5) - >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), - np.zeros(1, dtype=np.int64), np.array([0, 1], dtype=np.int64)) - >>> metric.summary() + >>> import numpy as np + >>> from doctr.utils import DetectionMetric + >>> metric = DetectionMetric(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), + >>> np.zeros(1, dtype=np.int64), np.array([0, 1], dtype=np.int64)) + >>> metric.summary() Args: iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match diff --git a/doctr/utils/multithreading.py b/doctr/utils/multithreading.py index 51af0c75af..e2f6b67107 100644 --- a/doctr/utils/multithreading.py +++ b/doctr/utils/multithreading.py @@ -14,10 +14,9 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Optional[int] = None) -> Iterable[Any]: """Execute a given function in parallel for each element of a given sequence - Example:: - >>> from doctr.utils.multithreading import multithread_exec - >>> entries = [1, 4, 8] - >>> results = multithread_exec(lambda x: x ** 2, entries) + >>> from doctr.utils.multithreading import multithread_exec + >>> entries = [1, 4, 8] + >>> results = multithread_exec(lambda x: x ** 2, entries) Args: func: function to be executed on each element of the iterable diff --git a/doctr/utils/visualization.py b/doctr/utils/visualization.py index 85760c2b31..764cf6c610 100644 --- a/doctr/utils/visualization.py +++ b/doctr/utils/visualization.py @@ -151,16 +151,15 @@ def visualize_page( ) -> Figure: """Visualize a full page with predicted blocks, lines and words - Example:: - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> from doctr.utils.visualization import visualize_page - >>> from doctr.models import ocr_db_crnn - >>> model = ocr_db_crnn(pretrained=True) - >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) - >>> out = model([[input_page]]) - >>> visualize_page(out[0].pages[0].export(), input_page) - >>> plt.show() + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> from doctr.utils.visualization import visualize_page + >>> from doctr.models import ocr_db_crnn + >>> model = ocr_db_crnn(pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([[input_page]]) + >>> visualize_page(out[0].pages[0].export(), input_page) + >>> plt.show() Args: page: the exported Page of a Document diff --git a/setup.py b/setup.py index 565f883daa..dbe42be4c5 100644 --- a/setup.py +++ b/setup.py @@ -73,13 +73,14 @@ "mypy>=0.812", "pydocstyle>=6.1.1", # Docs - "sphinx<3.5.0", - "sphinx-rtd-theme==0.4.3", + "sphinx>=4.0.0", "sphinxemoji>=0.1.8", "sphinx-copybutton>=0.3.1", "docutils<0.18", "recommonmark>=0.7.1", "sphinx-markdown-tables>=0.0.15", + "sphinx-tabs>=3.3.0", + "furo>=2022.3.4", ] deps = {b: a for a, b in (re.findall(r"^(([^!=<>]+)(?:[!=<>].*)?$)", x)[0] for x in _deps)} @@ -148,12 +149,13 @@ def deps_list(*pkgs): extras["docs_specific"] = deps_list( "sphinx", - "sphinx-rtd-theme", "sphinxemoji", "sphinx-copybutton", "docutils", "recommonmark", "sphinx-markdown-tables", + "sphinx-tabs", + "furo", ) extras["docs"] = extras["all"] + extras["docs_specific"]