Skip to content

Commit

Permalink
[DL-761] mesh htype support (activeloopai#1940)
Browse files Browse the repository at this point in the history
* First commit

* Fixed mypy

* Fixed mypy

* Fixed mypy

* mypy changes

* Added abstractions

* Addressed comments
  • Loading branch information
adolkhan authored Dec 9, 2022
1 parent a6ce856 commit e0328b9
Show file tree
Hide file tree
Showing 32 changed files with 1,917 additions and 202 deletions.
4 changes: 1 addition & 3 deletions deeplake/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,7 @@ def ingest_dataframe(
return ds # type: ignore

@staticmethod
@deeplake_reporter.record_call
def list(
workspace: str = "",
token: Optional[str] = None,
Expand All @@ -1260,9 +1261,6 @@ def list(
Returns:
List: List of dataset names.
"""
feature_report_path(
"", "list", parameters={"workspace": workspace}, token=token
)
client = DeepLakeBackendClient(token=token)
datasets = client.get_datasets(workspace=workspace)
return datasets
2 changes: 2 additions & 0 deletions deeplake/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,7 @@ def test_compressions_list():
"mpo",
"msp",
"pcx",
"ply",
"png",
"ppm",
"sgi",
Expand Down Expand Up @@ -1059,6 +1060,7 @@ def test_htypes_list():
"json",
"keypoints_coco",
"list",
"mesh",
"point",
"point_cloud",
"point_cloud.calibration_matrix",
Expand Down
33 changes: 33 additions & 0 deletions deeplake/api/tests/test_mesh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest

import deeplake
from deeplake.util.exceptions import DynamicTensorNumpyError


def test_mesh(local_ds, mesh_paths):
for i, (encoding_type, path) in enumerate(mesh_paths.items()):
if encoding_type == "ascii2":
pass
tensor = local_ds.create_tensor(
f"mesh_{i}", htype="mesh", sample_compression="ply"
)
sample = deeplake.read(path)
tensor.append(deeplake.read(path))
tensor_numpy = tensor.numpy()
assert tensor_numpy.shape[-1] == 3

tensor_data = tensor.data()
assert isinstance(tensor_data, dict)

tensor.append(deeplake.read(path))
tensor.append(deeplake.read(path))
tensor.append(deeplake.read(path))
if encoding_type == "bin":
with pytest.raises(DynamicTensorNumpyError):
tensor.numpy()
tensor_list = tensor.numpy(aslist=True)

assert len(tensor_list) == 4

tensor_data = tensor.data()
assert len(tensor_data) == 4
8 changes: 7 additions & 1 deletion deeplake/api/tests/test_point_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from deeplake.core.compression import compress_multiple
from deeplake.tests.common import get_dummy_data_path
from deeplake.util.exceptions import CorruptedSampleError
from deeplake.util.exceptions import DynamicTensorNumpyError

import numpy as np

Expand All @@ -20,7 +21,7 @@ def test_point_cloud(local_ds, point_cloud_paths):
if "point_cloud" in path: # check shape only for internal test point_clouds
assert sample.shape[0] == 20153

assert len(sample.meta) == 6
assert len(sample.meta) == 7
assert len(sample.meta["dimension_names"]) == 18
assert len(sample.meta["las_header"]) == 19

Expand Down Expand Up @@ -70,6 +71,11 @@ def test_point_cloud(local_ds, point_cloud_paths):
20153,
3,
)

assert isinstance(
local_ds.point_cloud_without_sample_compression.numpy(aslist=True), list
)

assert len(local_ds.point_cloud_without_sample_compression.numpy(aslist=True)) == 2
assert len(local_ds.point_cloud_without_sample_compression.data(aslist=True)) == 2
local_ds.create_tensor(
Expand Down
6 changes: 6 additions & 0 deletions deeplake/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@

READONLY_COMPRESSIONS = ["mpo", "fli", "dcm", *AUDIO_COMPRESSIONS, *VIDEO_COMPRESSIONS]
POINT_CLOUD_COMPRESSIONS = ["las"]
MESH_COMPRESSIONS = ["ply"]


# Just constants
Expand All @@ -101,6 +102,7 @@
VIDEO_COMPRESSION = "video"
AUDIO_COMPRESSION = "audio"
POINT_CLOUD_COMPRESSION = "point_cloud"
MESH_COMPRESSION = "mesh"


COMPRESSION_TYPES = [
Expand All @@ -109,6 +111,7 @@
AUDIO_COMPRESSION,
VIDEO_COMPRESSION,
POINT_CLOUD_COMPRESSION,
MESH_COMPRESSION,
]

# Pillow plugins for some formats might not be installed:
Expand All @@ -129,6 +132,7 @@
*AUDIO_COMPRESSIONS,
*VIDEO_COMPRESSIONS,
*POINT_CLOUD_COMPRESSIONS,
*MESH_COMPRESSIONS,
]
SUPPORTED_COMPRESSIONS = list(sorted(set(SUPPORTED_COMPRESSIONS))) # type: ignore
SUPPORTED_COMPRESSIONS.append(None) # type: ignore
Expand All @@ -151,6 +155,8 @@
_compression_types[c] = AUDIO_COMPRESSION
for c in POINT_CLOUD_COMPRESSIONS:
_compression_types[c] = POINT_CLOUD_COMPRESSION
for c in MESH_COMPRESSIONS:
_compression_types[c] = MESH_COMPRESSION


def get_compression_type(c):
Expand Down
116 changes: 33 additions & 83 deletions deeplake/core/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
UnsupportedCompressionError,
CorruptedSampleError,
)
from deeplake.util.point_cloud import LAS_HEADER_FILED_NAME_TO_PARSER
from deeplake.util.object_3d.read_3d_data import (
read_3d_data,
)
from deeplake.compression import (
get_compression_type,
BYTE_COMPRESSION,
VIDEO_COMPRESSION,
AUDIO_COMPRESSION,
POINT_CLOUD_COMPRESSION,
MESH_COMPRESSION,
)
from typing import Union, Tuple, Sequence, List, Optional, BinaryIO
import numpy as np
Expand All @@ -27,6 +30,7 @@
import numcodecs.lz4 # type: ignore
from numpy.core.fromnumeric import compress # type: ignore
import math
from pathlib import Path

try:
import av # type: ignore
Expand Down Expand Up @@ -219,6 +223,10 @@ def compress_array(array: np.ndarray, compression: Optional[str]) -> bytes:
raise NotImplementedError(
"In order to store point cloud data, you should use `deeplake.read(path_to_file)`. Compressing raw data is not yet supported."
)
elif compr_type == MESH_COMPRESSION:
raise NotImplementedError(
"In order to store mesh data, you should use `deeplake.read(path_to_file)`. Compressing raw data is not yet supported."
)
if compression == "apng":
return _compress_apng(array)
try:
Expand Down Expand Up @@ -287,8 +295,8 @@ def decompress_array(
return _decompress_audio(buffer)
elif compr_type == VIDEO_COMPRESSION:
return _decompress_video(buffer, start_idx, end_idx, step, reverse) # type: ignore
elif compr_type == POINT_CLOUD_COMPRESSION:
return _decompress_full_point_cloud(buffer)
elif compr_type in [POINT_CLOUD_COMPRESSION, MESH_COMPRESSION]:
return _decompress_3d_data(buffer)

if compression == "apng":
return _decompress_apng(buffer) # type: ignore
Expand Down Expand Up @@ -366,6 +374,8 @@ def compress_multiple(
raise NotImplementedError(
"compress_multiple does not support point cloud samples."
)
elif compr_type == MESH_COMPRESSION:
raise NotImplementedError("compress_multiple does not support mesh samples.")
elif compression == "apng":
raise NotImplementedError("compress_multiple does not support apng samples.")
canvas = np.zeros(_get_bounding_shape([arr.shape for arr in arrays]), dtype=dtype)
Expand Down Expand Up @@ -438,8 +448,8 @@ def verify_compressed_file(
return _read_video_shape(file), "|u1" # type: ignore
elif compression == "dcm":
return _read_dicom_shape_and_dtype(file)
elif compression == "las":
return _read_point_cloud_shape_and_dtype(file)
elif compression in ("las", "ply"):
return _read_3d_data_shape_and_dtype(file)
else:
return _fast_decompress(file)
except Exception as e:
Expand All @@ -463,6 +473,7 @@ def get_compression(header=None, path=None):
".avi",
".dcm",
".las",
".ply",
]
path = str(path).lower()
for fmt in file_formats:
Expand Down Expand Up @@ -648,9 +659,9 @@ def read_meta_from_compressed_file(
shape, typestr = _read_video_shape(file), "|u1" # type: ignore
except Exception as e:
raise CorruptedSampleError(compression)
elif compression == "las":
elif compression in ("las", "ply"):
try:
shape, typestr = _read_point_cloud_shape_and_dtype(file)
shape, typestr = _read_3d_data_shape_and_dtype(file)
except Exception as e:
raise CorruptedSampleError(compression) from e
else:
Expand Down Expand Up @@ -1090,86 +1101,25 @@ def _decompress_audio(
return audio


def _open_lidar_file(file):
try:
import laspy as lp # type: ignore
except:
raise ModuleNotFoundError("laspy not found. Install using `pip install laspy`")
return lp.read(file)


def _load_lidar_point_cloud_data(file):
point_cloud = _open_lidar_file(file)
dimension_names = list(point_cloud.point_format.dimension_names)
return point_cloud, dimension_names
def _open_3d_data(file):
if isinstance(file, str):
point_cloud = read_3d_data(file)
return point_cloud

point_cloud = read_3d_data(BytesIO(file))
return point_cloud

def _open_point_cloud_data(file: Union[bytes, memoryview, str]):
if isinstance(file, str):
point_cloud, dimension_names = _load_lidar_point_cloud_data(file)
return point_cloud, dimension_names

point_cloud, dimension_names = _load_lidar_point_cloud_data(BytesIO(file))
return point_cloud, dimension_names


def _read_point_cloud_meta(file):
point_cloud, dimension_names = _open_point_cloud_data(file)
meta_data = {
"dimension_names": dimension_names,
}
if type(point_cloud) != np.ndarray:
meta_data.update(
{
"las_header": {
"DEFAULT_VERSION": LAS_HEADER_FILED_NAME_TO_PARSER[
"DEFAULT_VERSION"
](point_cloud),
"file_source_id": point_cloud.header.file_source_id,
"system_identifier": point_cloud.header.system_identifier,
"generating_software": point_cloud.header.generating_software,
"creation_date": LAS_HEADER_FILED_NAME_TO_PARSER["creation_date"](
point_cloud
),
"point_count": point_cloud.header.point_count,
"scales": point_cloud.header.scales.tolist(),
"offsets": point_cloud.header.offsets.tolist(),
"number_of_points_by_return": point_cloud.header.number_of_points_by_return.tolist(),
"start_of_waveform_data_packet_record": point_cloud.header.start_of_waveform_data_packet_record,
"start_of_first_evlr": point_cloud.header.start_of_first_evlr,
"number_of_evlrs": point_cloud.header.number_of_evlrs,
"version": LAS_HEADER_FILED_NAME_TO_PARSER["version"](point_cloud),
"maxs": point_cloud.header.maxs.tolist(),
"mins": point_cloud.header.mins.tolist(),
"major_version": point_cloud.header.major_version,
"minor_version": point_cloud.header.minor_version,
"global_encoding": LAS_HEADER_FILED_NAME_TO_PARSER[
"global_encoding"
](point_cloud),
"uuid": str(point_cloud.header.uuid),
},
"vlrs": point_cloud.vlrs,
}
)
return meta_data

def _decompress_3d_data(file: Union[bytes, memoryview, str]):
point_cloud = _open_3d_data(file)
return point_cloud.decompressed_3d_data

def _read_point_cloud_shape_and_dtype(file):
point_cloud = _decompress_full_point_cloud(file)
shape = point_cloud.shape
return shape, point_cloud.dtype

def _read_3d_data_shape_and_dtype(file: Union[bytes, BinaryIO]):
point_cloud = _open_3d_data(file)
return point_cloud.shape, point_cloud.dtype

def _decompress_full_point_cloud(file: Union[bytes, memoryview, str]):
decompressed_point_cloud, _ = _open_point_cloud_data(file)
meta = _read_point_cloud_meta(file)

decompressed_point_cloud = np.concatenate(
[
np.expand_dims(decompressed_point_cloud[dim_name], -1)
for dim_name in meta["dimension_names"]
],
axis=1,
)
decompressed_point_cloud = decompressed_point_cloud.astype(np.float32)
return decompressed_point_cloud
def _read_3d_data_meta(file: Union[bytes, memoryview, str]):
point_cloud = _open_3d_data(file)
return point_cloud.meta_data
1 change: 1 addition & 0 deletions deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ def create_tensor(
"video",
"dicom",
"point_cloud",
"mesh",
):
self._create_sample_info_tensor(name)
if create_shape_tensor and htype not in ("text", "json"):
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/meta/tensor_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def _validate_htype_overwrites(htype: str, htype_overwrite: dict):
raise TensorMetaMissingRequiredValue(
actual_htype, ["chunk_compression", "sample_compression"] # type: ignore
)
if htype in ("audio", "video", "point_cloud"):
if htype in ("audio", "video", "point_cloud", "mesh"):
if cc not in (UNSPECIFIED, None):
raise UnsupportedCompressionError("Chunk compression", htype=htype)
elif sc == UNSPECIFIED:
Expand Down
9 changes: 5 additions & 4 deletions deeplake/core/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
_open_video,
_read_metadata_from_vstream,
_read_audio_meta,
_read_point_cloud_meta,
_read_3d_data_meta,
)
from deeplake.compression import (
get_compression_type,
AUDIO_COMPRESSION,
IMAGE_COMPRESSION,
VIDEO_COMPRESSION,
POINT_CLOUD_COMPRESSION,
MESH_COMPRESSION,
)
from deeplake.util.exceptions import UnableToReadFromUrlError
from deeplake.util.exif import getexif
Expand Down Expand Up @@ -235,9 +236,9 @@ def _get_audio_meta(self) -> dict:

def _get_point_cloud_meta(self) -> dict:
if self.path and get_path_type(self.path) == "local":
info = _read_point_cloud_meta(self.path)
info = _read_3d_data_meta(self.path)
else:
info = _read_point_cloud_meta(self.buffer)
info = _read_3d_data_meta(self.buffer)
return info

@property
Expand Down Expand Up @@ -495,7 +496,7 @@ def meta(self) -> dict:
meta.update(self._get_video_meta())
elif compression_type == AUDIO_COMPRESSION:
meta.update(self._get_audio_meta())
elif compression_type == POINT_CLOUD_COMPRESSION:
elif compression_type in [POINT_CLOUD_COMPRESSION, MESH_COMPRESSION]:
meta.update(self._get_point_cloud_meta())
meta["shape"] = self.shape
meta["format"] = self.compression
Expand Down
Loading

0 comments on commit e0328b9

Please sign in to comment.