Skip to content

Commit

Permalink
Merge pull request activeloopai#777 from activeloopai/feature/supervi…
Browse files Browse the repository at this point in the history
…sely

Add supervisely integration
  • Loading branch information
kristinagrig06 authored May 18, 2021
2 parents 0f1c5f3 + 691cfba commit d3a7998
Show file tree
Hide file tree
Showing 7 changed files with 424 additions and 4 deletions.
19 changes: 18 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,17 @@ commands:
get-python:
steps:
- run:
name: "Install Python"
name: "Install Python & ffmpeg"
command: |
brew update
brew install python@3.7
brew link --overwrite python@3.7
brew install ffmpeg
get-linux-ffmpeg:
steps:
- run:
name: "Install ffmpeg"
command: sudo apt-get update && sudo apt-get install ffmpeg
info:
steps:
- run:
Expand Down Expand Up @@ -168,6 +174,7 @@ commands:
pytest --cov-report=xml --cov=./
- when:
condition: << parameters.unix-like >>
resource_class: medium+
steps:
- run:
name: "Running tests - Unix"
Expand Down Expand Up @@ -244,6 +251,7 @@ jobs:
type: boolean
default: false
executor: << parameters.e >>
resource_class: medium+
steps:
- checkout
- unless:
Expand All @@ -254,6 +262,14 @@ jobs:
condition: << parameters.mac-like >>
steps:
- get-python
- when:
condition:
and:
- not: << parameters.mac-like >>
- << parameters.unix-like >>
- << parameters.optional >>
steps:
- get-linux-ffmpeg
- info
- google-creds:
unix-like: << parameters.unix-like >>
Expand All @@ -272,6 +288,7 @@ jobs:
executor: linux
environment:
IMAGE_NAME: snarkai/hub
resource_class: medium+
steps:
- setup_remote_docker
- checkout
Expand Down
30 changes: 30 additions & 0 deletions docs/source/integrations/supervisely.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Supervisely

## Dataset to Supervisely Project
Here is an example of a conversion of the dataset into Supervisely format.

```python
from hub import Dataset, schema

# Create dataset
ds = Dataset(
"./dataset",
shape=(64,),
schema={
"image": schema.Image((512, 512, 3)),
},
)

# transform into Supervisely project
project = ds.to_supervisely("sample-project")
```

## Supervisely Project to Dataset
In this manner, Hub dataset can be created from a supervisely project:

```python
import hub

out_ds = hub.Dataset.from_supervisely("sample-project")
res_ds = out_ds.store("./dataset")
```
30 changes: 30 additions & 0 deletions hub/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,18 @@ def to_tensorflow(self, indexes=None, include_shapes=False, key_list=None):
ds = _to_tensorflow(self, indexes, include_shapes, key_list)
return ds

def to_supervisely(self, output):
"""| Converts the dataset into a supervisely project
Parameters
----------
output: str
Project name and output directory.
"""
from .integrations import _to_supervisely

project = _to_supervisely(self, output)
return project

def _get_dictionary(self, subpath, slice_=None):
"""Gets dictionary from dataset given incomplete subpath"""
tensor_dict = {}
Expand Down Expand Up @@ -1067,6 +1079,24 @@ def from_pytorch(dataset, scheduler: str = "single", workers: int = 1):
ds = _from_pytorch(dataset, scheduler, workers)
return ds

@staticmethod
def from_supervisely(project, scheduler: str = "single", workers: int = 1):
"""| Converts a supervisely project into hub format
Parameters
----------
dataset:
The path to the supervisely project that needs to be converted into hub format
scheduler: str
choice between "single", "threaded", "processed"
workers: int
how many threads or processes to use
"""
from .integrations import _from_supervisely

ds = _from_supervisely(project, scheduler, workers)
return ds

@staticmethod
def from_path(path, scheduler="single", workers=1):
# infer schema & get data (label -> input mapping with file refs)
Expand Down
215 changes: 214 additions & 1 deletion hub/api/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
"""

import sys
import numpy as np
import json
from itertools import chain
from collections import defaultdict
import PIL.Image
import PIL.ImageDraw
from hub.exceptions import ModuleNotInstalledException, OutOfBoundsError
from hub.schema.features import Primitive, Tensor, SchemaDict
from hub.schema import Audio, BBox, ClassLabel, Image, Sequence, Text, Video
from hub.schema import Audio, BBox, ClassLabel, Image, Sequence, Text, Video, Mask
from .dataset import Dataset
import hub.store.pickle_s3_storage
import hub.schema.serialize
Expand Down Expand Up @@ -702,3 +707,211 @@ def __iter__(self):
self._init_ds()
for i in range(len(self)):
yield self[i]


def _from_supervisely(project, scheduler: str = "single", workers: int = 1):
try:
import supervisely_lib as sly
from supervisely_lib.project import project as sly_image_project
from supervisely_lib.project import video_project as sly_video_project
from skvideo.io import FFmpegReader, vread
except ModuleNotFoundError:
raise ModuleNotInstalledException("supervisely")
if isinstance(project, str):
with open(project + "meta.json") as meta_file:
project_meta_dict = json.load(meta_file)
instantiated = False
else:
project_meta_dict = project.meta.to_json()
instantiated = True
project_type = project_meta_dict["projectType"]
mode = sly.OpenMode.READ

def infer_image(paths):
bboxes, masks = [], []
classes_bb, classes_mask = [], []
item_path, item_ann_path = paths

ann = sly.Annotation.load_json_file(item_ann_path, project.meta)
ann_dict = ann.to_json()
sizes = (ann_dict["size"]["height"], ann_dict["size"]["width"])
for obj in ann_dict["objects"]:
if obj["geometryType"] == "rectangle":
bboxes.append(
[item for sublist in obj["points"]["exterior"] for item in sublist]
)
classes_bb.append(obj["classTitle"])
elif obj["geometryType"] == "polygon":
img = PIL.Image.new("L", (sizes[1], sizes[0]), 0)
PIL.ImageDraw.Draw(img).polygon(
[tuple(obj) for obj in obj["points"]["exterior"]],
outline=1,
fill=1,
)
masks.append(np.array(img))
classes_mask.append(obj["classTitle"])
return sizes, bboxes, masks, classes_bb, classes_mask

def infer_video(paths):
item_path, item_ann_path = paths
vreader = FFmpegReader(item_path)
return (vreader.getShape(),)

def infer_project(project, project_type, read_mode):
if project_type == "images":
if not instantiated:
project = sly_image_project.Project(project, mode)
max_shape = (0, 0)
return (
project,
Image,
infer_image,
max_shape,
)
elif project_type == "videos":
if not instantiated:
project = sly_video_project.VideoProject(project, mode)
max_shape = (0, 0, 0, 0)
return (
project,
Video,
infer_video,
max_shape,
)

project, main_blob, infer_ds, max_shape = infer_project(project, project_type, mode)

image_paths = []
label_names = []
max_num_bboxes = 0
max_num_polys = 0
masks = False
datasets = project.datasets.items()
uniform = True
for ds in datasets:
for i, item in enumerate(ds):
path = ds.get_item_paths(item)
image_paths.append(path)
inf = infer_ds(path)
if len(inf) > 1:
if inf[3]:
label_names.extend(inf[3])
if len(inf[3]) > max_num_bboxes:
max_num_bboxes = len(inf[3])
if inf[4]:
label_names.extend(inf[4])
if len(inf[4]) > max_num_polys:
max_num_polys = len(inf[4])
if inf[2]:
masks = True
shape = inf[0]
max_shape = np.maximum(shape, max_shape)
if uniform and max_shape.any() and (shape != max_shape).any():
uniform = False
label_names = list(np.unique(label_names))
items = chain(*datasets)
idatasets = iter(datasets)
ds, i = next(idatasets), 0
key = "shape" if uniform else "max_shape"
if project_type == "images":
read = sly.imaging.image.read
blob_shape = {key: (*max_shape.tolist(), 3)}
elif project_type == "videos":
read = vread
blob_shape = {key: max_shape.tolist()}
if key == "max_shape":
blob_shape["shape"] = (None, None, None, 3)

schema = {
project_type: main_blob(**blob_shape),
}
if max_num_bboxes:
schema["bbox"] = BBox(shape=(None, 4), max_shape=(max_num_bboxes, 4))
if label_names:
schema["label"] = ClassLabel(
shape=(None,),
max_shape=(max(max_num_bboxes, max_num_polys),),
names=label_names,
)
if masks:
schema["mask"] = Mask(
shape=(None, None, None), max_shape=(*max_shape.tolist(), 1)
)

@hub.transform(schema=schema, scheduler=scheduler, workers=workers)
def transformation(item):
nonlocal i, ds
sample = {}
if i >= len(ds):
ds, i = next(idatasets), 0
item_path, item_ann_path = ds.get_item_paths(item)
i += 1
_, bboxes, masks, classes_bbox, classes_mask = infer_ds(
(item_path, item_ann_path)
)
sample[project_type] = read(item_path)
if bboxes:
sample["bbox"] = np.array(bboxes)
sample["label"] = [label_names.index(i) for i in classes_bbox]
if masks:
sample["mask"] = np.expand_dims(masks[0], -1)
sample["label"] = [label_names.index(i) for i in classes_mask]
return sample

return transformation(list(items))


def _to_supervisely(dataset, output):
try:
import supervisely_lib as sly
from skvideo.io import vwrite
except ModuleNotFoundError:
raise ModuleNotInstalledException("supervisely")
schema_dict = dataset.schema.dict_
for key, schem in schema_dict.items():
if isinstance(schem, Image):
project_type = "images"
extension = "jpeg"
break
elif isinstance(schem, Video):
project_type = "videos"
extension = "mp4"
break
else:
raise Exception
mode = sly.OpenMode.CREATE
if project_type == "images":
_project = sly.Project
elif project_type == "videos":
_project = sly.VideoProject
else:
raise Exception
pr = _project(output, mode)
meta = pr.meta
meta._project_type = project_type
# probably here we can create multiple datasets
out_ds = pr.create_dataset(output)
try:
fn_key = "filename"
dataset[fn_key]
except KeyError:
fn_key = None
zeroes = len(str(len(dataset)))
for idx, view in enumerate(dataset):
obj = view[key].compute()
if fn_key:
fn = view[fn_key].compute()
else:
fn = f"{idx:0{zeroes}}"
fn = "{}.{}".format(fn, extension)
# strangely supervisely prevents from using this method on videos
try:
out_ds.add_item_np(fn, obj)
except RuntimeError:
# fix with in-memory file
path = "{}/{}".format(out_ds.item_dir, fn)
vwrite(path, obj)
out_ds._item_to_ann[fn] = fn + ".json"
out_ds.set_ann(fn, out_ds._get_empty_annotaion(path))
pr.set_meta(meta)
return pr
Loading

0 comments on commit d3a7998

Please sign in to comment.