Skip to content

Commit

Permalink
Increased test coverage by 4%. Many tests implemented, legacy code ex…
Browse files Browse the repository at this point in the history
…cluded from coverage.
  • Loading branch information
edogrigqv2 committed Nov 16, 2020
1 parent 1fdaa54 commit 6ce13d8
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 30 deletions.
3 changes: 1 addition & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
[run]
omit = */tests/*

omit = */tests/*, hub/collections/*, hub/codec/*
31 changes: 29 additions & 2 deletions hub/api/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,35 @@

def test_dataset2():
dt = {"first": "float", "second": "float"}
ds = Dataset(schema=dt, shape=(2,), url="./data/test/model", mode="w")
ds = Dataset(schema=dt, shape=(2,), url="./data/test/test_dataset2", mode="w")

ds["first"][0] = 2.3
assert ds["second"][0].numpy() != 2.3


def test_dataset_append_and_read():
dt = {"first": "float", "second": "float"}
ds = Dataset(
schema=dt,
shape=(2,),
url="./data/test/test_dataset_append_and_read",
mode="a",
)

ds["first"][0] = 2.3
assert ds["second"][0].numpy() != 2.3
ds.commit()

ds = Dataset(
url="./data/test/test_dataset_append_and_read",
mode="r",
)
ds.delete()
ds.commit()

# TODO Add case when non existing dataset is opened in read mode


def test_dataset(url="./data/test/dataset", token=None):
ds = Dataset(url, token=token, shape=(10000,), mode="w", schema=my_schema)

Expand Down Expand Up @@ -194,8 +217,12 @@ def test_dataset_s3():
@pytest.mark.skipif(not azure_creds_exist(), reason="requires azure credentials")
def test_dataset_azure():
import os

token = {"account_key": os.getenv("ACCOUNT_KEY")}
test_dataset("https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure", token=token)
test_dataset(
"https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure",
token=token,
)


if __name__ == "__main__":
Expand Down
41 changes: 30 additions & 11 deletions hub/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,73 +151,92 @@ def __init__(self, response):
message = f"No permision to store the dataset at {response}"
super(PermissionException, self).__init__(message=message)


class ShapeArgumentNotFoundException(HubException):
def __init__(self):
message = f"Parameter 'shape' should be provided for Dataset creation."
super(HubException, self).__init__(message=message)


class SchemaArgumentNotFoundException(HubException):
def __init__(self):
message = f"Parameter 'schema' should be provided for Dataset creation."
super(HubException, self).__init__(message=message)


class ValueShapeError(HubException):
def __init__(self, correct_shape, wrong_shape):
message = f"parameter 'value': expected array with shape {correct_shape}, got {wrong_shape}"
super(HubException, self).__init__(message=message)


class NoneValueException(HubException):
def __init__(self, param):
message = f"Parameter '{param}' should be provided"
super(HubException, self).__init__(message=message)


class ShapeLengthException(HubException):
def __init__(self):
message = f"Parameter 'shape' should be a tuple of length 1"
super(HubException, self).__init__(message=message)


class ModuleNotInstalledException(HubException):
def __init__(self, module_name):
message = f"Module '{module_name}' should be installed to convert the Dataset to the {module_name} format"
super(HubException, self).__init__(message=message)


class WrongUsernameException(HubException):
def __init__(self, username):
message = f"The username {username} was not found. Make sure that the username provided in the url " \
"matches the one used during login."
message = (
f"The username {username} was not found. Make sure that the username provided in the url "
"matches the one used during login."
)
super(HubException, self).__init__(message=message)


class NotHubDatasetToOverwriteException(HubException):
def __init__(self):
message = "Unable to overwrite the dataset. " \
"The provided directory is not empty and doesn't contain information about any Hub Dataset "
message = (
"Unable to overwrite the dataset. "
"The provided directory is not empty and doesn't contain information about any Hub Dataset "
)
super(HubException, self).__init__(message=message)


class NotHubDatasetToAppendException(HubException):
def __init__(self):
message = "Unable to append to the dataset. " \
"The provided directory is not empty and doesn't contain information about any Hub Dataset "
message = (
"Unable to append to the dataset. "
"The provided directory is not empty and doesn't contain information about any Hub Dataset "
)
super(HubException, self).__init__(message=message)

class DynamicTensorNotFoundException(Exception):

class DynamicTensorNotFoundException(HubException):
def __init__(self):
message = f"Unable to find dynamic tensor"
super(HubException, self).__init__(message=message)

class DynamicTensorShapeException(Exception):

class DynamicTensorShapeException(HubException):
def __init__(self, exc_type):
if exc_type == 'none':
if exc_type == "none":
message = f"Parameter 'max_shape' shouldn't contain any 'None' value"
elif exc_type == 'length':
elif exc_type == "length":
message = "Lengths of 'shape' and 'max_shape' should be equal"
elif exc_type == 'not_equal':
elif exc_type == "not_equal":
message = "All not-None values from 'shape' should be equal to the corresponding values in 'max_shape'"
else:
message = "Wrong 'shape' or 'max_shape' values"
super(HubException, self).__init__(message=message)


class NotZarrFolderException(Exception):
pass


class StorageTensorNotFoundException(Exception):
pass
3 changes: 3 additions & 0 deletions hub/features/tests/class_label_names.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
alpha
beta
gamma
26 changes: 26 additions & 0 deletions hub/features/tests/test_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from hub.features.class_label import ClassLabel, _load_names_from_file

names_file = "./hub/features/tests/class_label_names.txt"


def test_load_names_from_file():
assert _load_names_from_file(names_file) == [
"alpha",
"beta",
"gamma",
]


def test_class_label():
bel1 = ClassLabel(num_classes=4)
bel2 = ClassLabel(names=["alpha", "beta", "gamma"])
ClassLabel(names_file=names_file)
assert bel1.names == ["0", "1", "2", "3"]
assert bel2.names == ["alpha", "beta", "gamma"]
assert bel1.str2int("1") == 1
assert bel2.str2int("gamma") == 2
assert bel1.int2str(2) is None # FIXME This is a bug, should raise an error
assert bel2.int2str(0) == "alpha"
assert bel1.num_classes == 4
assert bel2.num_classes == 3
bel1.get_attr_dict()
27 changes: 16 additions & 11 deletions hub/store/dynamic_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

from hub.store.nested_store import NestedStore

from hub.exceptions import (DynamicTensorNotFoundException,
ValueShapeError,
DynamicTensorShapeException
)
from hub.exceptions import (
DynamicTensorNotFoundException,
ValueShapeError,
DynamicTensorShapeException,
)
from hub.api.dataset_utils import slice_extract_info


Expand Down Expand Up @@ -159,14 +160,14 @@ def __init__(
self.max_shape = self._storage_tensor.shape
self.dtype = self._storage_tensor.dtype
if len(self.shape) != len(self.max_shape):
raise DynamicTensorShapeException('length')
raise DynamicTensorShapeException("length")
for item in self.max_shape:
if item is None:
raise DynamicTensorShapeException('none')
raise DynamicTensorShapeException("none")
for item in zip(self.shape, self.max_shape):
if item[0] is not None:
if item[0] != item[1]:
raise DynamicTensorShapeException('not_equal')
raise DynamicTensorShapeException("not_equal")

def __getitem__(self, slice_):
"""Gets a slice or slices from tensor"""
Expand Down Expand Up @@ -198,15 +199,15 @@ def __setitem__(self, slice_, value):

def check_value_shape(self, value, slice_):
"""Checks if value can be set to the slice"""
if None not in self.shape and self.dtype != 'O':
if None not in self.shape and self.dtype != "O":
if not all([isinstance(sh, int) for sh in slice_]):
expected_value_shape = tuple(
[
len(range(*slice_shape.indices(self.shape[i])))
for i, slice_shape in enumerate(slice_)
if not isinstance(slice_shape, int)
]
)
)
if expected_value_shape[0] == 1 and len(expected_value_shape) > 1:
expected_value_shape = expected_value_shape[1:]

Expand All @@ -222,8 +223,11 @@ def check_value_shape(self, value, slice_):
else:
expected_value_shape = (1,)
if isinstance(value, list):
value = np.array(value)
if isinstance(value, np.ndarray) and value.shape != expected_value_shape:
value = np.array(value)
if (
isinstance(value, np.ndarray)
and value.shape != expected_value_shape
):
raise ValueShapeError(expected_value_shape, value.shape)
return value

Expand Down Expand Up @@ -295,6 +299,7 @@ def _get_slice(self, slice_, real_shapes):
)
return tuple(slice_)

# FIXME I don't see this class being used anywhere
@classmethod
def _get_slice_upper_boundary(cls, slice_):
if isinstance(slice_, slice):
Expand Down
37 changes: 33 additions & 4 deletions hub/store/tests/test_dynamic_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,44 @@

import numpy as np
import fsspec
from zarr.creation import create

from hub.store.dynamic_tensor import DynamicTensor
from hub.store.store import StorageMapWrapperWithCommit


def create_store(path: str):
def create_store(path: str, overwrite=True):
fs: fsspec.AbstractFileSystem = fsspec.filesystem("file")
if fs.exists(path):
if fs.exists(path) and overwrite:
fs.rm(path, recursive=True)
fs.makedirs(posixpath.join(path, "--dynamic--"))
dynpath = posixpath.join(path, "--dynamic--")
if not fs.exists(dynpath):
fs.makedirs(dynpath)
mapper = fs.get_mapper(path)
mapper["--dynamic--/hello.txt"] = bytes("Hello World", "utf-8")
return StorageMapWrapperWithCommit(mapper)


def test_read_and_append_modes():
t = DynamicTensor(
create_store("./data/test/test_read_and_append_modes"),
mode="a",
shape=(5, 100, 100),
max_shape=(5, 100, 100),
dtype="int32",
)
t[0, 80:, 80:] = np.ones((20, 20), dtype="int32")
assert t[0, -5, 90:].tolist() == [1] * 10
t.commit()
t = DynamicTensor(
create_store("./data/test/test_read_and_append_modes", overwrite=False),
mode="r",
)
t.get_shape(0) == (100, 100)
assert t[0, -5, 90:].tolist() == [1] * 10
t.commit()


def test_dynamic_tensor():
t = DynamicTensor(
create_store("./data/test/test_dynamic_tensor"),
Expand All @@ -42,6 +65,11 @@ def test_dynamic_tensor_2():
assert t[0, 5, :].tolist() == [1] * 10
t[0, 6] = 2 * np.ones((20,), dtype="int32")
assert t[0, 5, :].tolist() == [1] * 10 + [0] * 10
assert t.get_shape([0]) == (
10,
20,
) # FIXME This is a bug accessing [0], should be just 0
assert t.get_shape([slice(0, 1)]) == (10, 20) # FIXME This is also a bug


def test_dynamic_tensor_3():
Expand Down Expand Up @@ -69,6 +97,7 @@ def test_dynamic_tensor_shapes():
t[0] = np.ones((5, 10), dtype="int32")
t[0, 6] = 2 * np.ones((20,), dtype="int32")
assert t[0, -1].tolist() == [2] * 20
t.commit()


def test_dynamic_tensor_4():
Expand Down Expand Up @@ -97,6 +126,6 @@ def test_chunk_iterator():


if __name__ == "__main__":
test_dynamic_tensor_2()
test_read_and_append_modes()
# test_chunk_iterator()
# test_dynamic_tensor_shapes()
23 changes: 23 additions & 0 deletions hub/store/tests/test_nested_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from hub.store.nested_store import NestedStore

import zarr


def test_nested_store():
store = NestedStore(zarr.MemoryStore(), "hello")
store["item"] = bytes("Hello World", "utf-8")
assert store["item"] == bytes("Hello World", "utf-8")
del store["item"]
assert store.get("item") is None
store["item1"] = bytes("Hello World 1", "utf-8")
store["item2"] = bytes("Hello World 2", "utf-8")
assert len(store) == 2
assert tuple(store) == ("item1", "item2")
try:
store.commit()
except AttributeError as ex:
assert "'MemoryStore' object has no attribute 'commit'" in str(ex)


if __name__ == "__main__":
test_nested_store()
Loading

0 comments on commit 6ce13d8

Please sign in to comment.