From 17bf8a000dc5a35aedf6cfbe974a78f21610eb6e Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Fri, 16 Apr 2021 18:13:36 +0200 Subject: [PATCH 01/14] add supervisely integration --- hub/api/dataset.py | 30 ++++++++++ hub/api/integrations.py | 123 ++++++++++++++++++++++++++++++++++++++ requirements-optional.txt | 1 + 3 files changed, 154 insertions(+) diff --git a/hub/api/dataset.py b/hub/api/dataset.py index 506afe6c64..b0db5acfa9 100644 --- a/hub/api/dataset.py +++ b/hub/api/dataset.py @@ -793,6 +793,18 @@ def to_tensorflow(self, indexes=None, include_shapes=False, key_list=None): ds = _to_tensorflow(self, indexes, include_shapes, key_list) return ds + def to_supervisely(self, output): + """| Converts the dataset into a supervisely project + Parameters + ---------- + output: str + Project name and output directory. + """ + from .integrations import _to_supervisely + + project = _to_supervisely(self, output) + return project + def _get_dictionary(self, subpath, slice_=None): """Gets dictionary from dataset given incomplete subpath""" tensor_dict = {} @@ -1047,6 +1059,24 @@ def from_pytorch(dataset, scheduler: str = "single", workers: int = 1): ds = _from_pytorch(dataset, scheduler, workers) return ds + @staticmethod + def from_supervisely(project, scheduler: str = "single", workers: int = 1): + """| Converts a supervisely project into hub format + + Parameters + ---------- + dataset: + The path to the supervisely project that needs to be converted into hub format + scheduler: str + choice between "single", "threaded", "processed" + workers: int + how many threads or processes to use + """ + from .integrations import _from_supervisely + + ds = _from_supervisely(project, scheduler, workers) + return ds + @staticmethod def from_path(path, scheduler="single", workers=1): # infer schema & get data (label -> input mapping with file refs) diff --git a/hub/api/integrations.py b/hub/api/integrations.py index e89c61e5db..b047c27c62 100644 --- a/hub/api/integrations.py +++ b/hub/api/integrations.py @@ -5,6 +5,9 @@ """ import sys +import numpy as np +import json +from itertools import chain from collections import defaultdict from hub.exceptions import ModuleNotInstalledException, OutOfBoundsError from hub.schema.features import Primitive, Tensor, SchemaDict @@ -659,3 +662,123 @@ def __iter__(self): self._init_ds() for i in range(len(self)): yield self[i] + + +def _from_supervisely(project, scheduler: str = "single", workers: int = 1): + try: + import supervisely_lib as sly + from supervisely_lib.project import project as sly_image_project + from supervisely_lib.project import video_project as sly_video_project + from skvideo.io import FFmpegReader, vread + except ModuleNotFoundError: + raise ModuleNotInstalledException("supervisely") + + with open(project + "meta.json") as meta_file: + project_meta_dict = json.load(meta_file) + project_type = project_meta_dict['projectType'] + mode = sly.OpenMode.READ + def infer_project(project, project_type, read_mode): + def infer_shape_image(paths): + item_path, item_ann_path = paths + ann = sly.Annotation.load_json_file(item_ann_path, project.meta) + ann_dict = ann.to_json() + return list(ann_dict['size'].values()) + def infer_shape_video(paths): + item_path, item_ann_path = paths + vreader = FFmpegReader(item_path) + return vreader.getShape() + if project_type == 'images': + project = sly_image_project.Project(project, mode) + max_shape = (0, 0) + return project, Image, infer_shape_image, max_shape + elif project_type == 'videos': + project = sly_video_project.VideoProject(project, mode) + max_shape = (0, 0, 0, 0) + return project, Video, infer_shape_video, max_shape + # else: + # project = sly_pcd_project.PointcloudProject(project, mode) + # return project, None, None + # blob_type = PointCloud # once this schema is defined + project, main_blob, infer_shape, max_shape = infer_project(project, project_type, mode) + label_names = [] + datasets = project.datasets.items() + uniform = True + for ds in datasets: + for item in ds: + shape = infer_shape(ds.get_item_paths(item)) + max_shape = np.maximum(shape, max_shape) + if uniform and max_shape.any() and (shape != max_shape).any(): + uniform = False + label_names.append(ds.name) + items = chain(*datasets) + idatasets = iter(datasets) + ds, i = next(idatasets), 0 + key = 'shape' if uniform else 'max_shape' + if project_type == 'images': + read = sly.imaging.image.read + blob_shape = {key: (*max_shape.tolist(), 3)} + elif project_type == 'videos': + read = vread + blob_shape = {key: max_shape.tolist()} + if key == 'max_shape': + blob_shape['shape'] = (None, None, None, 3) + schema = {project_type: main_blob(**blob_shape), "dataset": ClassLabel(names=label_names)} + @hub.transform(schema=schema, scheduler=scheduler, workers=workers) + def transformation(item): + nonlocal i, ds + if i >= len(ds): + ds, i = next(idatasets), 0 + item_path, item_ann_path = ds.get_item_paths(item) + i += 1 + return {project_type: read(item_path), "dataset": schema["dataset"].str2int(ds.name)} + return transformation(items) + + +def _to_supervisely(dataset, output): + try: + import supervisely_lib as sly + from skvideo.io import vwrite + except ModuleNotFoundError: + raise ModuleNotInstalledException("supervisely") + + schema_dict = dataset.schema.dict_ + for key, schem in schema_dict.items(): + if isinstance(schem, Image): + project_type = "images" + extension = "jpeg" + break + elif isinstance(schem, Video): + project_type = "videos" + extension = "mp4" + break + else: + raise Exception + mode = sly.OpenMode.CREATE + if project_type == 'images': + _project = sly.Project + elif project_type == 'videos': + _project = sly.VideoProject + else: + raise Exception + pr = _project(output, mode) + # probably here we can create multiple datasets + out_ds = pr.create_dataset(output) + try: + fn_key = "filename" + dataset[fn_key] + except KeyError: + fn_key = None + for idx, view in enumerate(dataset): + obj = view[key].compute() + fn = view[fn_key].compute() if fn_key else str(idx) + fn = "{}.{}".format(fn, extension) + # strangely supervisely prevents from using this method on videos + try: + out_ds.add_item_np(fn, obj) + except RuntimeError: + # fix with in-memory file + path = "{}/{}".format(out_ds.item_dir, fn) + vwrite(path, obj) + out_ds._item_to_ann[fn] = fn + ".json" + out_ds.set_ann(fn, out_ds._get_empty_annotaion(path)) + return pr diff --git a/requirements-optional.txt b/requirements-optional.txt index 83e2a36c11..1c42c17529 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -6,3 +6,4 @@ transformers>=3.5.1 dask[complete]>=2.30 tensorflow_datasets ray==1.2.0 +supervisely==6.1.64 \ No newline at end of file From 1ca563e7b52db5c113d2e1c3cafbe06abc802ed9 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Fri, 16 Apr 2021 18:14:09 +0200 Subject: [PATCH 02/14] add docs for supervisely integration --- docs/source/integrations/supervisely.md | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 docs/source/integrations/supervisely.md diff --git a/docs/source/integrations/supervisely.md b/docs/source/integrations/supervisely.md new file mode 100644 index 0000000000..1e54d9f188 --- /dev/null +++ b/docs/source/integrations/supervisely.md @@ -0,0 +1,30 @@ +# Supervisely + +## Dataset to Supervisely Project +Here is an example of a conversion of the dataset into Supervisely format. + +```python +from hub import Dataset, schema + +# Create dataset +ds = Dataset( + "./dataset", + shape=(64,), + schema={ + "image": schema.Image((512, 512, 3)), + }, +) + +# transform into Supervisely project +project = ds.to_supervisely("sample-project") +``` + +## Supervisely Project to Dataset +In this manner, Hub dataset can be created from a supervisely project: + +```python +import hub + +out_ds = hub.Dataset.from_supervisely("sample-project") +res_ds = out_ds.store("./dataset") +``` From 066cc494c1d9017dca8c678cd56d8d2561721660 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Fri, 16 Apr 2021 18:15:17 +0200 Subject: [PATCH 03/14] add tests for supervisely --- hub/api/tests/test_converters.py | 21 +++++++++++++++++++-- hub/utils.py | 8 ++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index eabee2baf1..0d3cd76f92 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -7,9 +7,8 @@ import hub.api.tests.test_converters from hub.schema.features import Tensor import numpy as np -from hub.utils import tfds_loaded, tensorflow_loaded, pytorch_loaded +from hub.utils import tfds_loaded, tensorflow_loaded, pytorch_loaded, supervisely_loaded, Timer import pytest -from hub.utils import Timer @pytest.mark.skipif(not tfds_loaded(), reason="requires tfds to be loaded") @@ -464,6 +463,24 @@ def test_to_tensorflow_bug(): data = ds.to_tensorflow() +@pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") +def test_to_supervisely(): + ds = hub.Dataset("activeloop/mnist", mode="r") + data = ds.to_supervisely() + + +@pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") +def test_from_supervisely(): + import supervisely_lib as sly + + project_path = "data/test_from_supervisely/project1" + project = sly.Project(project_path, sly.OpenMode.CREATE) + project_ds = project.create_dataset("example") + img = np.array([[255, 255, 255]]) + project_ds.add_item_np("pixel.jpeg", img) + ds = hub.Dataset.from_supervisely(project_path) + + if __name__ == "__main__": with Timer("Test Converters"): with Timer("from MNIST"): diff --git a/hub/utils.py b/hub/utils.py index 5ca5385362..c43432dd9e 100644 --- a/hub/utils.py +++ b/hub/utils.py @@ -157,6 +157,14 @@ def pathos_loaded(): return True +def supervisely_loaded(): + try: + import supervisely_lib + except ImportError: + return False + return True + + def compute_lcm(a): """ Lowest Common Multiple of a list a From 48bf733a695c162b11193d2e0f04ee9d44a718da Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Mon, 19 Apr 2021 14:25:38 +0200 Subject: [PATCH 04/14] improve tests --- hub/api/integrations.py | 30 +++++++++++++++++++----------- hub/api/tests/test_converters.py | 25 ++++++++++++++++++++----- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/hub/api/integrations.py b/hub/api/integrations.py index b047c27c62..7de5fb3eff 100644 --- a/hub/api/integrations.py +++ b/hub/api/integrations.py @@ -672,9 +672,13 @@ def _from_supervisely(project, scheduler: str = "single", workers: int = 1): from skvideo.io import FFmpegReader, vread except ModuleNotFoundError: raise ModuleNotInstalledException("supervisely") - - with open(project + "meta.json") as meta_file: - project_meta_dict = json.load(meta_file) + if isinstance(project, str): + with open(project + "meta.json") as meta_file: + project_meta_dict = json.load(meta_file) + instantiated = False + else: + project_meta_dict = project.meta.to_json() + instantiated = True project_type = project_meta_dict['projectType'] mode = sly.OpenMode.READ def infer_project(project, project_type, read_mode): @@ -688,17 +692,15 @@ def infer_shape_video(paths): vreader = FFmpegReader(item_path) return vreader.getShape() if project_type == 'images': - project = sly_image_project.Project(project, mode) + if not instantiated: + project = sly_image_project.Project(project, mode) max_shape = (0, 0) return project, Image, infer_shape_image, max_shape elif project_type == 'videos': - project = sly_video_project.VideoProject(project, mode) + if not instantiated: + project = sly_video_project.VideoProject(project, mode) max_shape = (0, 0, 0, 0) return project, Video, infer_shape_video, max_shape - # else: - # project = sly_pcd_project.PointcloudProject(project, mode) - # return project, None, None - # blob_type = PointCloud # once this schema is defined project, main_blob, infer_shape, max_shape = infer_project(project, project_type, mode) label_names = [] datasets = project.datasets.items() @@ -740,7 +742,6 @@ def _to_supervisely(dataset, output): from skvideo.io import vwrite except ModuleNotFoundError: raise ModuleNotInstalledException("supervisely") - schema_dict = dataset.schema.dict_ for key, schem in schema_dict.items(): if isinstance(schem, Image): @@ -761,6 +762,8 @@ def _to_supervisely(dataset, output): else: raise Exception pr = _project(output, mode) + meta = pr.meta + meta._project_type = project_type # probably here we can create multiple datasets out_ds = pr.create_dataset(output) try: @@ -768,9 +771,13 @@ def _to_supervisely(dataset, output): dataset[fn_key] except KeyError: fn_key = None + zeroes = len(str(len(dataset))) for idx, view in enumerate(dataset): obj = view[key].compute() - fn = view[fn_key].compute() if fn_key else str(idx) + if fn_key: + fn = view[fn_key].compute() + else: + fn = f"{idx:0{zeroes}}" fn = "{}.{}".format(fn, extension) # strangely supervisely prevents from using this method on videos try: @@ -781,4 +788,5 @@ def _to_supervisely(dataset, output): vwrite(path, obj) out_ds._item_to_ann[fn] = fn + ".json" out_ds.set_ann(fn, out_ds._get_empty_annotaion(path)) + pr.set_meta(meta) return pr diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index 0d3cd76f92..910d5cebf1 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -7,6 +7,8 @@ import hub.api.tests.test_converters from hub.schema.features import Tensor import numpy as np +import shutil +import os.path from hub.utils import tfds_loaded, tensorflow_loaded, pytorch_loaded, supervisely_loaded, Timer import pytest @@ -465,20 +467,33 @@ def test_to_tensorflow_bug(): @pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") def test_to_supervisely(): - ds = hub.Dataset("activeloop/mnist", mode="r") - data = ds.to_supervisely() + data_path = "./data/test_supervisely/to_from" + dataset_name = "rock_paper_scissors_test" + if os.path.exists(data_path): + shutil.rmtree(data_path) + original_dataset = hub.Dataset(f"activeloop/{dataset_name}", mode="r") + project = original_dataset.to_supervisely(os.path.join(data_path, dataset_name)) + trans = hub.Dataset.from_supervisely(project) + new_dataset = trans.store(os.path.join(data_path, "new_rpst")) @pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") def test_from_supervisely(): import supervisely_lib as sly - project_path = "data/test_from_supervisely/project1" + data_path = "./data/test_supervisely/from_to" + if os.path.exists(data_path): + shutil.rmtree(data_path) + project_name = "pixel_project" + project_path = os.path.join(data_path, project_name) project = sly.Project(project_path, sly.OpenMode.CREATE) - project_ds = project.create_dataset("example") + project.meta._project_type = "images" + project_ds = project.create_dataset(project_name) img = np.array([[255, 255, 255]]) project_ds.add_item_np("pixel.jpeg", img) - ds = hub.Dataset.from_supervisely(project_path) + trans = hub.Dataset.from_supervisely(project) + dataset = trans.store(os.path.join(data_path, "pixel_dataset")) + project_back = dataset.to_supervisely(os.path.join(data_path, "pixel_project_back")) if __name__ == "__main__": From 8122eea1a7e79ba2b77dfd98957b4ce3e5db76c0 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Mon, 19 Apr 2021 14:29:05 +0200 Subject: [PATCH 05/14] lint --- hub/api/integrations.py | 42 +++++++++++++++++++++----------- hub/api/tests/test_converters.py | 16 +++++++++--- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/hub/api/integrations.py b/hub/api/integrations.py index 7de5fb3eff..7f10a3eb64 100644 --- a/hub/api/integrations.py +++ b/hub/api/integrations.py @@ -679,29 +679,35 @@ def _from_supervisely(project, scheduler: str = "single", workers: int = 1): else: project_meta_dict = project.meta.to_json() instantiated = True - project_type = project_meta_dict['projectType'] + project_type = project_meta_dict["projectType"] mode = sly.OpenMode.READ + def infer_project(project, project_type, read_mode): def infer_shape_image(paths): item_path, item_ann_path = paths ann = sly.Annotation.load_json_file(item_ann_path, project.meta) ann_dict = ann.to_json() - return list(ann_dict['size'].values()) + return list(ann_dict["size"].values()) + def infer_shape_video(paths): item_path, item_ann_path = paths vreader = FFmpegReader(item_path) return vreader.getShape() - if project_type == 'images': + + if project_type == "images": if not instantiated: project = sly_image_project.Project(project, mode) max_shape = (0, 0) return project, Image, infer_shape_image, max_shape - elif project_type == 'videos': + elif project_type == "videos": if not instantiated: project = sly_video_project.VideoProject(project, mode) max_shape = (0, 0, 0, 0) return project, Video, infer_shape_video, max_shape - project, main_blob, infer_shape, max_shape = infer_project(project, project_type, mode) + + project, main_blob, infer_shape, max_shape = infer_project( + project, project_type, mode + ) label_names = [] datasets = project.datasets.items() uniform = True @@ -715,16 +721,20 @@ def infer_shape_video(paths): items = chain(*datasets) idatasets = iter(datasets) ds, i = next(idatasets), 0 - key = 'shape' if uniform else 'max_shape' - if project_type == 'images': + key = "shape" if uniform else "max_shape" + if project_type == "images": read = sly.imaging.image.read blob_shape = {key: (*max_shape.tolist(), 3)} - elif project_type == 'videos': + elif project_type == "videos": read = vread blob_shape = {key: max_shape.tolist()} - if key == 'max_shape': - blob_shape['shape'] = (None, None, None, 3) - schema = {project_type: main_blob(**blob_shape), "dataset": ClassLabel(names=label_names)} + if key == "max_shape": + blob_shape["shape"] = (None, None, None, 3) + schema = { + project_type: main_blob(**blob_shape), + "dataset": ClassLabel(names=label_names), + } + @hub.transform(schema=schema, scheduler=scheduler, workers=workers) def transformation(item): nonlocal i, ds @@ -732,7 +742,11 @@ def transformation(item): ds, i = next(idatasets), 0 item_path, item_ann_path = ds.get_item_paths(item) i += 1 - return {project_type: read(item_path), "dataset": schema["dataset"].str2int(ds.name)} + return { + project_type: read(item_path), + "dataset": schema["dataset"].str2int(ds.name), + } + return transformation(items) @@ -755,9 +769,9 @@ def _to_supervisely(dataset, output): else: raise Exception mode = sly.OpenMode.CREATE - if project_type == 'images': + if project_type == "images": _project = sly.Project - elif project_type == 'videos': + elif project_type == "videos": _project = sly.VideoProject else: raise Exception diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index 910d5cebf1..047369754e 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -9,7 +9,13 @@ import numpy as np import shutil import os.path -from hub.utils import tfds_loaded, tensorflow_loaded, pytorch_loaded, supervisely_loaded, Timer +from hub.utils import ( + tfds_loaded, + tensorflow_loaded, + pytorch_loaded, + supervisely_loaded, + Timer, +) import pytest @@ -465,7 +471,9 @@ def test_to_tensorflow_bug(): data = ds.to_tensorflow() -@pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") +@pytest.mark.skipif( + not supervisely_loaded(), reason="requires supervisely to be loaded" +) def test_to_supervisely(): data_path = "./data/test_supervisely/to_from" dataset_name = "rock_paper_scissors_test" @@ -477,7 +485,9 @@ def test_to_supervisely(): new_dataset = trans.store(os.path.join(data_path, "new_rpst")) -@pytest.mark.skipif(not supervisely_loaded(), reason="requires supervisely to be loaded") +@pytest.mark.skipif( + not supervisely_loaded(), reason="requires supervisely to be loaded" +) def test_from_supervisely(): import supervisely_lib as sly From 2b13551dc799f4f8ac17896df59d2de0069cec49 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Wed, 21 Apr 2021 02:53:04 +0200 Subject: [PATCH 06/14] add video tests --- hub/api/tests/test_converters.py | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index 047369754e..d1bb8433f2 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -506,6 +506,51 @@ def test_from_supervisely(): project_back = dataset.to_supervisely(os.path.join(data_path, "pixel_project_back")) +@pytest.mark.skipif( + not supervisely_loaded(), reason="requires supervisely to be loaded" +) +def test_to_supervisely_video(): + data_path = "./data/test_supervisely/video_to" + if os.path.exists(data_path): + shutil.rmtree(data_path) + schema = { + "vid": hub.schema.Video(shape=(1, 1, 2, 3)), + "filename": hub.schema.Text(max_shape=5), + } + ds = hub.Dataset( + os.path.join(data_path, "hub_video_dataset"), schema=schema, shape=3 + ) + filenames = ["one", "two", "three"] + ds["filename"][:] = filenames + project = _to_supervisely(ds, os.path.join(data_path, "sly_video_dataset")) + + +@pytest.mark.skipif( + not supervisely_loaded(), reason="requires supervisely to be loaded" +) +def test_from_supervisely_video(): + import supervisely_lib as sly + from skvideo.io import vwrite + + data_path = "./data/test_supervisely/video_from" + if os.path.exists(data_path): + shutil.rmtree(data_path) + project_name = "minuscule_videos/" + project_path = os.path.join(data_path, project_name) + project = sly.VideoProject(project_path, sly.OpenMode.CREATE) + project.meta._project_type = "videos" + item_name = "item.mp4" + np.random.seed(0) + for name in ["foofoo", "bar"]: + ds = project.create_dataset(name) + item_path = os.path.join(ds.item_dir, item_name) + vwrite(item_path, (np.random.rand(len(name), 2, 2, 3) * 255).astype("uint8")) + ds._item_to_ann[item_name] = item_name + ".json" + ds.set_ann(item_name, ds._get_empty_annotaion(item_path)) + project.set_meta(project.meta) + trans = _from_supervisely(os.path.join(data_path, project_name)) + + if __name__ == "__main__": with Timer("Test Converters"): with Timer("from MNIST"): From e54c3f2ee1e9e379ce4196fec80a78d0f3994be7 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Wed, 21 Apr 2021 12:25:27 +0200 Subject: [PATCH 07/14] add ffmpeg to circleci for video tests --- .circleci/config.yml | 15 ++++++++++++++- hub/api/tests/test_converters.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8ea761d075..c2e4933b3a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -90,11 +90,17 @@ commands: get-python: steps: - run: - name: "Install Python" + name: "Install Python & ffmpeg" command: | brew update brew install python@3.7 brew link --overwrite python@3.7 + brew install ffmpeg + get-linux-ffmpeg: + steps: + - run: + name: "Install ffmpeg" + command: apt-get update && apt-get install ffmpeg info: steps: - run: @@ -254,6 +260,13 @@ jobs: condition: << parameters.mac-like >> steps: - get-python + - when: + condition: + and: + - not: << parameters.mac-like >> + - << parameters.unix-like >> + steps: + - get-linux-ffmpeg - info - google-creds: unix-like: << parameters.unix-like >> diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index d1bb8433f2..88f311d3e8 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -522,7 +522,7 @@ def test_to_supervisely_video(): ) filenames = ["one", "two", "three"] ds["filename"][:] = filenames - project = _to_supervisely(ds, os.path.join(data_path, "sly_video_dataset")) + project = ds.to_supervisely(os.path.join(data_path, "sly_video_dataset")) @pytest.mark.skipif( @@ -548,7 +548,7 @@ def test_from_supervisely_video(): ds._item_to_ann[item_name] = item_name + ".json" ds.set_ann(item_name, ds._get_empty_annotaion(item_path)) project.set_meta(project.meta) - trans = _from_supervisely(os.path.join(data_path, project_name)) + trans = hub.Dataset.from_supervisely(os.path.join(data_path, project_name)) if __name__ == "__main__": From 1432b8c8fa38c3e5b0633dc729cd41fde8a75451 Mon Sep 17 00:00:00 2001 From: haiyangdeperci <31899203+haiyangdeperci@users.noreply.github.com> Date: Wed, 21 Apr 2021 15:40:33 +0200 Subject: [PATCH 08/14] leave ffmpeg as optional --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c2e4933b3a..afa4dc5541 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -100,7 +100,7 @@ commands: steps: - run: name: "Install ffmpeg" - command: apt-get update && apt-get install ffmpeg + command: sudo apt-get update && sudo apt-get install ffmpeg info: steps: - run: @@ -265,6 +265,7 @@ jobs: and: - not: << parameters.mac-like >> - << parameters.unix-like >> + - << parameters.optional >> steps: - get-linux-ffmpeg - info From 90212efb6218ea53bcd51223d575ac905fc2f69d Mon Sep 17 00:00:00 2001 From: Shashank Agarwal Date: Fri, 30 Apr 2021 20:11:32 +0530 Subject: [PATCH 09/14] Update resource class --- .circleci/config.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index afa4dc5541..cc726dbcdb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,6 +81,7 @@ executors: commands: aliases: + resource_class: medium+ steps: - run: name: "Setting aliases for Windows" @@ -88,6 +89,7 @@ commands: Add-Content -Path $profile -Value {Set-Alias -Name python3 -Value python} Add-Content -Path $profile -Value {Set-Alias -Name pip3 -Value pip} get-python: + resource_class: medium+ steps: - run: name: "Install Python & ffmpeg" @@ -97,11 +99,13 @@ commands: brew link --overwrite python@3.7 brew install ffmpeg get-linux-ffmpeg: + resource_class: medium+ steps: - run: name: "Install ffmpeg" command: sudo apt-get update && sudo apt-get install ffmpeg info: + resource_class: medium+ steps: - run: name: "Gather machine info" @@ -112,9 +116,11 @@ commands: unix-like: type: boolean default: true + resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> + resource_class: medium+ steps: - run: name: "Prepare Google credentials - Windows" @@ -126,6 +132,7 @@ commands: [Text.Encoding]::ASCII.GetString([Convert]::FromBase64String($Env:GOOGLE_APPLICATION_CREDENTIALS)) | Out-File -FilePath $Env:CI_GCS_PATH -Encoding ASCII - when: condition: << parameters.unix-like >> + resource_class: medium+ steps: - run: name: "Prepare Google credentials - Unix" @@ -138,6 +145,7 @@ commands: description: "Include optional requirements" type: boolean default: false + resource_class: medium+ steps: - run: name: "Collecting requirements" @@ -149,6 +157,7 @@ commands: pip3 install -r requirements.txt pip3 install -e . style-check: + resource_class: medium+ steps: - run: name: "Checking code style" @@ -160,6 +169,7 @@ commands: unix-like: type: boolean default: true + resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> @@ -174,6 +184,7 @@ commands: pytest --cov-report=xml --cov=./ - when: condition: << parameters.unix-like >> + resource_class: medium+ steps: - run: name: "Running tests - Unix" @@ -181,6 +192,7 @@ commands: export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.secrets/gcs.json pytest --cov-report=xml --cov=./ codecov-upload: + resource_class: medium+ steps: - codecov/upload: file: coverage.xml @@ -189,6 +201,7 @@ commands: unix-like: type: boolean default: true + resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> @@ -205,11 +218,13 @@ commands: - store_artifacts: path: test-reports slack-status: + resource_class: medium+ steps: - slack/status: fail_only: true webhook: $SLACK_WEBHOOK conda-install: + resource_class: medium+ steps: - run: name: "Install Miniconda" @@ -219,6 +234,7 @@ commands: bash Miniconda3-py38_4.9.2-Linux-x86_64.sh -b echo 'export PATH=$HOME/miniconda3/bin:$PATH' >> $BASH_ENV conda-setup: + resource_class: medium+ steps: - run: name: "Install required conda packages and set config" @@ -227,6 +243,7 @@ commands: conda config --add channels conda-forge conda config --set anaconda_upload yes conda-build: + resource_class: medium+ steps: - run: name: "Build Hub for conda" @@ -250,6 +267,7 @@ jobs: type: boolean default: false executor: << parameters.e >> + resource_class: medium+ steps: - checkout - unless: @@ -286,6 +304,7 @@ jobs: executor: linux environment: IMAGE_NAME: snarkai/hub + resource_class: medium+ steps: - setup_remote_docker - checkout @@ -323,6 +342,7 @@ jobs: - slack-status conda: executor: linux + resource_class: medium+ steps: - checkout - conda-install From fc005de792d6c799c6a7901d7b5ca8996e3471f5 Mon Sep 17 00:00:00 2001 From: Shashank Agarwal Date: Fri, 30 Apr 2021 20:20:29 +0530 Subject: [PATCH 10/14] Fix circleCi --- .circleci/config.yml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cc726dbcdb..fd10ee4bb1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,7 +81,6 @@ executors: commands: aliases: - resource_class: medium+ steps: - run: name: "Setting aliases for Windows" @@ -89,7 +88,6 @@ commands: Add-Content -Path $profile -Value {Set-Alias -Name python3 -Value python} Add-Content -Path $profile -Value {Set-Alias -Name pip3 -Value pip} get-python: - resource_class: medium+ steps: - run: name: "Install Python & ffmpeg" @@ -99,13 +97,11 @@ commands: brew link --overwrite python@3.7 brew install ffmpeg get-linux-ffmpeg: - resource_class: medium+ steps: - run: name: "Install ffmpeg" command: sudo apt-get update && sudo apt-get install ffmpeg info: - resource_class: medium+ steps: - run: name: "Gather machine info" @@ -116,11 +112,9 @@ commands: unix-like: type: boolean default: true - resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> - resource_class: medium+ steps: - run: name: "Prepare Google credentials - Windows" @@ -132,7 +126,6 @@ commands: [Text.Encoding]::ASCII.GetString([Convert]::FromBase64String($Env:GOOGLE_APPLICATION_CREDENTIALS)) | Out-File -FilePath $Env:CI_GCS_PATH -Encoding ASCII - when: condition: << parameters.unix-like >> - resource_class: medium+ steps: - run: name: "Prepare Google credentials - Unix" @@ -145,7 +138,6 @@ commands: description: "Include optional requirements" type: boolean default: false - resource_class: medium+ steps: - run: name: "Collecting requirements" @@ -157,7 +149,6 @@ commands: pip3 install -r requirements.txt pip3 install -e . style-check: - resource_class: medium+ steps: - run: name: "Checking code style" @@ -169,7 +160,6 @@ commands: unix-like: type: boolean default: true - resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> @@ -192,7 +182,6 @@ commands: export GOOGLE_APPLICATION_CREDENTIALS=$HOME/.secrets/gcs.json pytest --cov-report=xml --cov=./ codecov-upload: - resource_class: medium+ steps: - codecov/upload: file: coverage.xml @@ -201,7 +190,6 @@ commands: unix-like: type: boolean default: true - resource_class: medium+ steps: - unless: condition: << parameters.unix-like >> @@ -218,13 +206,11 @@ commands: - store_artifacts: path: test-reports slack-status: - resource_class: medium+ steps: - slack/status: fail_only: true webhook: $SLACK_WEBHOOK conda-install: - resource_class: medium+ steps: - run: name: "Install Miniconda" @@ -234,7 +220,6 @@ commands: bash Miniconda3-py38_4.9.2-Linux-x86_64.sh -b echo 'export PATH=$HOME/miniconda3/bin:$PATH' >> $BASH_ENV conda-setup: - resource_class: medium+ steps: - run: name: "Install required conda packages and set config" @@ -243,7 +228,6 @@ commands: conda config --add channels conda-forge conda config --set anaconda_upload yes conda-build: - resource_class: medium+ steps: - run: name: "Build Hub for conda" @@ -342,7 +326,6 @@ jobs: - slack-status conda: executor: linux - resource_class: medium+ steps: - checkout - conda-install From 580fe2ffbdda38ca43a0a95b7d84fd4deb7cfeb0 Mon Sep 17 00:00:00 2001 From: kristinagrig06 Date: Mon, 3 May 2021 00:14:20 +0400 Subject: [PATCH 11/14] Add mask and bounding box support --- hub/api/integrations.py | 120 +++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 26 deletions(-) diff --git a/hub/api/integrations.py b/hub/api/integrations.py index d88f6fa08f..a328136078 100644 --- a/hub/api/integrations.py +++ b/hub/api/integrations.py @@ -9,9 +9,11 @@ import json from itertools import chain from collections import defaultdict +import PIL.Image +import PIL.ImageDraw from hub.exceptions import ModuleNotInstalledException, OutOfBoundsError from hub.schema.features import Primitive, Tensor, SchemaDict -from hub.schema import Audio, BBox, ClassLabel, Image, Sequence, Text, Video +from hub.schema import Audio, BBox, ClassLabel, Image, Sequence, Text, Video, Mask from .dataset import Dataset import hub.store.pickle_s3_storage import hub.schema.serialize @@ -683,42 +685,88 @@ def _from_supervisely(project, scheduler: str = "single", workers: int = 1): project_type = project_meta_dict["projectType"] mode = sly.OpenMode.READ - def infer_project(project, project_type, read_mode): - def infer_shape_image(paths): - item_path, item_ann_path = paths - ann = sly.Annotation.load_json_file(item_ann_path, project.meta) - ann_dict = ann.to_json() - return list(ann_dict["size"].values()) + def infer_image(paths): + bboxes, masks = [], [] + classes_bb, classes_mask = [], [] + item_path, item_ann_path = paths + + ann = sly.Annotation.load_json_file(item_ann_path, project.meta) + ann_dict = ann.to_json() + sizes = (ann_dict["size"]["height"], ann_dict["size"]["width"]) + for obj in ann_dict["objects"]: + if obj["geometryType"] == "rectangle": + bboxes.append( + [item for sublist in obj["points"]["exterior"] for item in sublist] + ) + classes_bb.append(obj["classTitle"]) + elif obj["geometryType"] == "polygon": + img = PIL.Image.new("L", (sizes[1], sizes[0]), 0) + PIL.ImageDraw.Draw(img).polygon( + [tuple(obj) for obj in obj["points"]["exterior"]], + outline=1, + fill=1, + ) + masks.append(np.array(img)) + classes_mask.append(obj["classTitle"]) + return sizes, bboxes, masks, classes_bb, classes_mask - def infer_shape_video(paths): - item_path, item_ann_path = paths - vreader = FFmpegReader(item_path) - return vreader.getShape() + def infer_video(paths): + item_path, item_ann_path = paths + vreader = FFmpegReader(item_path) + return (vreader.getShape(),) + def infer_project(project, project_type, read_mode): if project_type == "images": if not instantiated: project = sly_image_project.Project(project, mode) max_shape = (0, 0) - return project, Image, infer_shape_image, max_shape + return ( + project, + Image, + infer_image, + max_shape, + ) elif project_type == "videos": if not instantiated: project = sly_video_project.VideoProject(project, mode) max_shape = (0, 0, 0, 0) - return project, Video, infer_shape_video, max_shape + return ( + project, + Video, + infer_video, + max_shape, + ) - project, main_blob, infer_shape, max_shape = infer_project( - project, project_type, mode - ) + project, main_blob, infer_ds, max_shape = infer_project(project, project_type, mode) + + image_paths = [] label_names = [] + max_num_bboxes = 0 + max_num_polys = 0 + masks = False datasets = project.datasets.items() uniform = True for ds in datasets: - for item in ds: - shape = infer_shape(ds.get_item_paths(item)) + for i, item in enumerate(ds): + path = ds.get_item_paths(item) + image_paths.append(path) + inf = infer_ds(path) + if len(inf) > 1: + if inf[3]: + label_names.extend(inf[3]) + if len(inf[3]) > max_num_bboxes: + max_num_bboxes = len(inf[3]) + if inf[4]: + label_names.extend(inf[4]) + if len(inf[3]) > max_num_polys: + max_num_polys = len(inf[4]) + if inf[2]: + masks = True + shape = inf[0] max_shape = np.maximum(shape, max_shape) if uniform and max_shape.any() and (shape != max_shape).any(): uniform = False - label_names.append(ds.name) + label_names = list(np.unique(label_names)) items = chain(*datasets) idatasets = iter(datasets) ds, i = next(idatasets), 0 @@ -731,24 +779,44 @@ def infer_shape_video(paths): blob_shape = {key: max_shape.tolist()} if key == "max_shape": blob_shape["shape"] = (None, None, None, 3) + schema = { project_type: main_blob(**blob_shape), - "dataset": ClassLabel(names=label_names), } + if max_num_bboxes: + schema["bbox"] = BBox(shape=(None, 4), max_shape=(max_num_bboxes, 4)) + if label_names: + schema["label"] = ClassLabel( + shape=(None,), + max_shape=(max(max_num_bboxes, max_num_polys),), + names=label_names, + ) + if masks: + schema["mask"] = Mask( + shape=(None, None, None), max_shape=(*max_shape.tolist(), 1) + ) @hub.transform(schema=schema, scheduler=scheduler, workers=workers) def transformation(item): nonlocal i, ds + sample = {} if i >= len(ds): ds, i = next(idatasets), 0 item_path, item_ann_path = ds.get_item_paths(item) i += 1 - return { - project_type: read(item_path), - "dataset": schema["dataset"].str2int(ds.name), - } - - return transformation(items) + _, bboxes, masks, classes_bbox, classes_mask = infer_ds( + (item_path, item_ann_path) + ) + sample[project_type] = read(item_path) + if bboxes: + sample["bbox"] = np.array(bboxes) + sample["label"] = [label_names.index(i) for i in classes_bbox] + if masks: + sample["mask"] = np.expand_dims(masks[0], -1) + sample["label"] = [label_names.index(i) for i in classes_mask] + return sample + + return transformation(list(items)) def _to_supervisely(dataset, output): From 8023547bff3052ed1e9eaa4a3ff2a538344107d7 Mon Sep 17 00:00:00 2001 From: kristinagrig06 Date: Mon, 3 May 2021 11:53:27 +0400 Subject: [PATCH 12/14] Remove test_dataset from commit --- hub/api/dataset_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hub/api/dataset_utils.py b/hub/api/dataset_utils.py index cbca5ca99d..b6279f9c0f 100644 --- a/hub/api/dataset_utils.py +++ b/hub/api/dataset_utils.py @@ -299,6 +299,13 @@ def check_class_label(value: Union[np.ndarray, list], label: ClassLabel): assign_class_labels[i] = convert_str_arr_to_int(val, label) if any((isinstance(val, np.ndarray) for val in assign_class_labels)): assign_class_labels_flat = np.hstack(assign_class_labels) + elif any((isinstance(val, List) for val in assign_class_labels)): + assign_class_labels_flat = [ + item for sublist in assign_class_labels for item in sublist + ] + for i, val in enumerate(assign_class_labels): + if isinstance(val, List): + assign_class_labels[i] = np.array(val) else: assign_class_labels_flat = assign_class_labels if ( @@ -309,5 +316,7 @@ def check_class_label(value: Union[np.ndarray, list], label: ClassLabel): range(label.num_classes - 1), assign_class_labels_flat ) if len(assign_class_labels) == 1: + if isinstance(assign_class_labels, List): + return [np.array(assign_class_labels[0])] return assign_class_labels[0] return assign_class_labels From 32d41a3dac13be74b3ddc21bd1b0cb9be29d5635 Mon Sep 17 00:00:00 2001 From: kristinagrig06 Date: Mon, 3 May 2021 12:06:18 +0400 Subject: [PATCH 13/14] Revert "Remove test_dataset from commit" This reverts commit 8023547bff3052ed1e9eaa4a3ff2a538344107d7. --- hub/api/dataset_utils.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/hub/api/dataset_utils.py b/hub/api/dataset_utils.py index b6279f9c0f..cbca5ca99d 100644 --- a/hub/api/dataset_utils.py +++ b/hub/api/dataset_utils.py @@ -299,13 +299,6 @@ def check_class_label(value: Union[np.ndarray, list], label: ClassLabel): assign_class_labels[i] = convert_str_arr_to_int(val, label) if any((isinstance(val, np.ndarray) for val in assign_class_labels)): assign_class_labels_flat = np.hstack(assign_class_labels) - elif any((isinstance(val, List) for val in assign_class_labels)): - assign_class_labels_flat = [ - item for sublist in assign_class_labels for item in sublist - ] - for i, val in enumerate(assign_class_labels): - if isinstance(val, List): - assign_class_labels[i] = np.array(val) else: assign_class_labels_flat = assign_class_labels if ( @@ -316,7 +309,5 @@ def check_class_label(value: Union[np.ndarray, list], label: ClassLabel): range(label.num_classes - 1), assign_class_labels_flat ) if len(assign_class_labels) == 1: - if isinstance(assign_class_labels, List): - return [np.array(assign_class_labels[0])] return assign_class_labels[0] return assign_class_labels From 899547456ba0cd5b02d2a7c493b9a34d7701364f Mon Sep 17 00:00:00 2001 From: kristinagrig06 Date: Mon, 3 May 2021 12:07:14 +0400 Subject: [PATCH 14/14] Add tests --- hub/api/integrations.py | 2 +- hub/api/tests/test_converters.py | 40 +++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/hub/api/integrations.py b/hub/api/integrations.py index a328136078..df29621d0c 100644 --- a/hub/api/integrations.py +++ b/hub/api/integrations.py @@ -758,7 +758,7 @@ def infer_project(project, project_type, read_mode): max_num_bboxes = len(inf[3]) if inf[4]: label_names.extend(inf[4]) - if len(inf[3]) > max_num_polys: + if len(inf[4]) > max_num_polys: max_num_polys = len(inf[4]) if inf[2]: masks = True diff --git a/hub/api/tests/test_converters.py b/hub/api/tests/test_converters.py index 88f311d3e8..e309981745 100644 --- a/hub/api/tests/test_converters.py +++ b/hub/api/tests/test_converters.py @@ -497,13 +497,47 @@ def test_from_supervisely(): project_name = "pixel_project" project_path = os.path.join(data_path, project_name) project = sly.Project(project_path, sly.OpenMode.CREATE) + init_meta = project.meta project.meta._project_type = "images" project_ds = project.create_dataset(project_name) - img = np.array([[255, 255, 255]]) + img = np.ones((30, 30, 3)) project_ds.add_item_np("pixel.jpeg", img) + item_path, item_ann_path = project_ds.get_item_paths("pixel.jpeg") + ann = sly.Annotation.load_json_file(item_ann_path, project.meta) + bbox_class = sly.ObjClass(name="_bbox", geometry_type=sly.Rectangle) + meta_with_bboxes = project.meta.add_obj_classes([bbox_class]) + bbox_label = sly.Label( + geometry=sly.Rectangle(0, 0, 10, 10), + obj_class=meta_with_bboxes.obj_classes.get("_bbox"), + ) + ann_with_bboxes = ann.add_labels([bbox_label]) + project_ds.set_ann("pixel.jpeg", ann_with_bboxes) + project.set_meta(meta_with_bboxes) + + trans = hub.Dataset.from_supervisely(project) + dataset = trans.store(os.path.join(data_path, "pixel_dataset_bbox")) + project_back = dataset.to_supervisely( + os.path.join(data_path, "pixel_project_bbox_back") + ) + project.set_meta(init_meta) + poly_class = sly.ObjClass(name="_poly", geometry_type=sly.Polygon) + meta_with_poly = project.meta.add_obj_classes([poly_class]) + points = [[0, 0], [0, 10], [10, 0], [10, 10]] + point_loc_points = [ + sly.geometry.point_location.PointLocation(*point) for point in points + ] + poly_label = sly.Label( + geometry=sly.Polygon(exterior=point_loc_points, interior=[]), + obj_class=meta_with_poly.obj_classes.get("_poly"), + ) + ann_with_polys = ann.add_labels([poly_label]) + project_ds.set_ann("pixel.jpeg", ann_with_polys) + project.set_meta(meta_with_poly) trans = hub.Dataset.from_supervisely(project) - dataset = trans.store(os.path.join(data_path, "pixel_dataset")) - project_back = dataset.to_supervisely(os.path.join(data_path, "pixel_project_back")) + dataset = trans.store(os.path.join(data_path, "pixel_dataset_poly")) + project_back = dataset.to_supervisely( + os.path.join(data_path, "pixel_project_poly_back") + ) @pytest.mark.skipif(