Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add supervisely integration #777

Merged
merged 26 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
17bf8a0
add supervisely integration
haiyangdeperci Apr 16, 2021
1ca563e
add docs for supervisely integration
haiyangdeperci Apr 16, 2021
066cc49
add tests for supervisely
haiyangdeperci Apr 16, 2021
48bf733
improve tests
haiyangdeperci Apr 19, 2021
8122eea
lint
haiyangdeperci Apr 19, 2021
2b13551
add video tests
haiyangdeperci Apr 21, 2021
e54c3f2
add ffmpeg to circleci for video tests
haiyangdeperci Apr 21, 2021
1432b8c
leave ffmpeg as optional
haiyangdeperci Apr 21, 2021
9575acd
Merge branch 'master' of https://github.com/activeloopai/Hub into fea…
kristinagrig06 Apr 30, 2021
ac5e4ff
Fix case with multiple class labels
kristinagrig06 Apr 30, 2021
6bdbc88
Add test
kristinagrig06 Apr 30, 2021
90212ef
Update resource class
imshashank Apr 30, 2021
582fd8b
Merge branch 'feature/supervisely' of github.com:activeloopai/Hub int…
imshashank Apr 30, 2021
fc005de
Fix circleCi
imshashank Apr 30, 2021
702c3c6
Merge branch 'fixes/class_label_shape' into feature/supervisely
kristinagrig06 May 2, 2021
580fe2f
Add mask and bounding box support
kristinagrig06 May 2, 2021
7298495
Merge branch 'feature/supervisely' of https://github.com/activeloopai…
kristinagrig06 May 2, 2021
f862517
Add support for list of lists
kristinagrig06 May 2, 2021
30dbdde
Add tests
kristinagrig06 May 2, 2021
e4b0593
Add list test
kristinagrig06 May 3, 2021
8023547
Remove test_dataset from commit
kristinagrig06 May 3, 2021
32d41a3
Revert "Remove test_dataset from commit"
kristinagrig06 May 3, 2021
8995474
Add tests
kristinagrig06 May 3, 2021
3006546
Merge branch 'fixes/class_label_shape' into feature/supervisely
kristinagrig06 May 3, 2021
eff07d4
Merge remote-tracking branch 'origin' into feature/supervisely
kristinagrig06 May 11, 2021
691cfba
Merge branch 'master' of https://github.com/activeloopai/Hub into fea…
kristinagrig06 May 11, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add supervisely integration
  • Loading branch information
haiyangdeperci committed Apr 16, 2021
commit 17bf8a000dc5a35aedf6cfbe974a78f21610eb6e
30 changes: 30 additions & 0 deletions hub/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,18 @@ def to_tensorflow(self, indexes=None, include_shapes=False, key_list=None):
ds = _to_tensorflow(self, indexes, include_shapes, key_list)
return ds

def to_supervisely(self, output):
"""| Converts the dataset into a supervisely project
Parameters
----------
output: str
Project name and output directory.
"""
from .integrations import _to_supervisely

project = _to_supervisely(self, output)
return project

def _get_dictionary(self, subpath, slice_=None):
"""Gets dictionary from dataset given incomplete subpath"""
tensor_dict = {}
Expand Down Expand Up @@ -1047,6 +1059,24 @@ def from_pytorch(dataset, scheduler: str = "single", workers: int = 1):
ds = _from_pytorch(dataset, scheduler, workers)
return ds

@staticmethod
def from_supervisely(project, scheduler: str = "single", workers: int = 1):
"""| Converts a supervisely project into hub format

Parameters
----------
dataset:
The path to the supervisely project that needs to be converted into hub format
scheduler: str
choice between "single", "threaded", "processed"
workers: int
how many threads or processes to use
"""
from .integrations import _from_supervisely

ds = _from_supervisely(project, scheduler, workers)
return ds

@staticmethod
def from_path(path, scheduler="single", workers=1):
# infer schema & get data (label -> input mapping with file refs)
Expand Down
123 changes: 123 additions & 0 deletions hub/api/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
"""

import sys
import numpy as np
import json
from itertools import chain
from collections import defaultdict
from hub.exceptions import ModuleNotInstalledException, OutOfBoundsError
from hub.schema.features import Primitive, Tensor, SchemaDict
Expand Down Expand Up @@ -659,3 +662,123 @@ def __iter__(self):
self._init_ds()
for i in range(len(self)):
yield self[i]


def _from_supervisely(project, scheduler: str = "single", workers: int = 1):
try:
import supervisely_lib as sly
from supervisely_lib.project import project as sly_image_project
from supervisely_lib.project import video_project as sly_video_project
from skvideo.io import FFmpegReader, vread
except ModuleNotFoundError:
raise ModuleNotInstalledException("supervisely")

with open(project + "meta.json") as meta_file:
project_meta_dict = json.load(meta_file)
project_type = project_meta_dict['projectType']
mode = sly.OpenMode.READ
def infer_project(project, project_type, read_mode):
def infer_shape_image(paths):
item_path, item_ann_path = paths
ann = sly.Annotation.load_json_file(item_ann_path, project.meta)
ann_dict = ann.to_json()
return list(ann_dict['size'].values())
def infer_shape_video(paths):
item_path, item_ann_path = paths
vreader = FFmpegReader(item_path)
return vreader.getShape()
if project_type == 'images':
project = sly_image_project.Project(project, mode)
max_shape = (0, 0)
return project, Image, infer_shape_image, max_shape
elif project_type == 'videos':
project = sly_video_project.VideoProject(project, mode)
max_shape = (0, 0, 0, 0)
return project, Video, infer_shape_video, max_shape
# else:
# project = sly_pcd_project.PointcloudProject(project, mode)
# return project, None, None
# blob_type = PointCloud # once this schema is defined
project, main_blob, infer_shape, max_shape = infer_project(project, project_type, mode)
label_names = []
datasets = project.datasets.items()
uniform = True
for ds in datasets:
for item in ds:
shape = infer_shape(ds.get_item_paths(item))
max_shape = np.maximum(shape, max_shape)
if uniform and max_shape.any() and (shape != max_shape).any():
uniform = False
label_names.append(ds.name)
items = chain(*datasets)
idatasets = iter(datasets)
ds, i = next(idatasets), 0
key = 'shape' if uniform else 'max_shape'
if project_type == 'images':
read = sly.imaging.image.read
blob_shape = {key: (*max_shape.tolist(), 3)}
elif project_type == 'videos':
read = vread
blob_shape = {key: max_shape.tolist()}
if key == 'max_shape':
blob_shape['shape'] = (None, None, None, 3)
schema = {project_type: main_blob(**blob_shape), "dataset": ClassLabel(names=label_names)}
@hub.transform(schema=schema, scheduler=scheduler, workers=workers)
def transformation(item):
nonlocal i, ds
if i >= len(ds):
ds, i = next(idatasets), 0
item_path, item_ann_path = ds.get_item_paths(item)
i += 1
return {project_type: read(item_path), "dataset": schema["dataset"].str2int(ds.name)}
return transformation(items)


def _to_supervisely(dataset, output):
try:
import supervisely_lib as sly
from skvideo.io import vwrite
except ModuleNotFoundError:
raise ModuleNotInstalledException("supervisely")

schema_dict = dataset.schema.dict_
for key, schem in schema_dict.items():
if isinstance(schem, Image):
project_type = "images"
extension = "jpeg"
break
elif isinstance(schem, Video):
project_type = "videos"
extension = "mp4"
break
else:
raise Exception
mode = sly.OpenMode.CREATE
if project_type == 'images':
_project = sly.Project
elif project_type == 'videos':
_project = sly.VideoProject
else:
raise Exception
pr = _project(output, mode)
# probably here we can create multiple datasets
out_ds = pr.create_dataset(output)
try:
fn_key = "filename"
dataset[fn_key]
except KeyError:
fn_key = None
for idx, view in enumerate(dataset):
obj = view[key].compute()
fn = view[fn_key].compute() if fn_key else str(idx)
fn = "{}.{}".format(fn, extension)
# strangely supervisely prevents from using this method on videos
try:
out_ds.add_item_np(fn, obj)
except RuntimeError:
# fix with in-memory file
path = "{}/{}".format(out_ds.item_dir, fn)
vwrite(path, obj)
out_ds._item_to_ann[fn] = fn + ".json"
out_ds.set_ann(fn, out_ds._get_empty_annotaion(path))
return pr
1 change: 1 addition & 0 deletions requirements-optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ transformers>=3.5.1
dask[complete]>=2.30
tensorflow_datasets
ray==1.2.0
supervisely==6.1.64