Skip to content

Commit

Permalink
Merge pull request #2350 from activeloopai/indra-virtual-tensors
Browse files Browse the repository at this point in the history
Expose indra virtual tensors in deeplake
  • Loading branch information
levongh authored May 16, 2023
2 parents 9aca528 + fbe8209 commit 29e49b3
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 8 deletions.
13 changes: 8 additions & 5 deletions deeplake/core/dataset/deeplake_query_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,16 @@ def __getitem__(
def __getattr__(self, key):
try:
return self.__getitem__(key)
except TensorDoesNotExistError as ke:
except:
try:
return getattr(self.deeplake_ds, key)
except AttributeError:
raise AttributeError(
f"'{self.__class__}' object has no attribute '{key}'"
) from ke
except:
try:
return getattr(self.indra_ds, key)
except:
raise AttributeError(
f"'{self.__class__}' object has no attribute '{key}'"
)

def __len__(self):
return len(self.indra_ds)
Expand Down
32 changes: 32 additions & 0 deletions deeplake/core/tests/test_deeplake_indra_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,35 @@ def test_random_split(hub_cloud_ds_generator):
l = val.dataloader().pytorch().shuffle()
for b in l:
pass


@requires_libdeeplake
def test_virtual_tensors(hub_cloud_ds_generator):
deeplake_ds = hub_cloud_ds_generator()
with deeplake_ds:
deeplake_ds.create_tensor("label", htype="generic", dtype=np.int32)
deeplake_ds.create_tensor("embeddings", htype="generic", dtype=np.float32)
for i in range(100):
count = i % 5
deeplake_ds.label.append([int(i % 100)] * count)
deeplake_ds.embeddings.append(
[1.0 / float(i + 1), 0.0, -1.0 / float(i + 1)]
)

deeplake_indra_ds = deeplake_ds.query("SELECT shape(label)[0] as num_labels")
assert len(deeplake_indra_ds) == 100
assert deeplake_indra_ds.num_labels[0].numpy() == [0]
assert deeplake_indra_ds.num_labels[1].numpy() == [1]
assert deeplake_indra_ds.num_labels[2].numpy() == [2]
assert deeplake_indra_ds.num_labels[3].numpy() == [3]
assert deeplake_indra_ds.num_labels[4].numpy() == [4]
assert np.sum(deeplake_indra_ds.num_labels.numpy()) == 200

deeplake_indra_ds = deeplake_ds.query(
"SELECT l2_norm(embeddings - ARRAY[0, 0, 0]) as score order by l2_norm(embeddings - ARRAY[0, 0, 0]) asc"
)
assert len(deeplake_indra_ds) == 100
for i in range(100, 1):
assert deeplake_indra_ds.score[100 - i].numpy() == [
np.sqrt(2.0 / (i + 1) / (i + 1))
]
6 changes: 4 additions & 2 deletions deeplake/enterprise/libdeeplake_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ def query(dataset, query_string: str):
else:
ds = dataset_to_libdeeplake(dataset)
dsv = ds.query(query_string)
try:
from deeplake.enterprise.convert_to_libdeeplake import INDRA_API

if not isinstance(dataset, DeepLakeQueryDataset) and INDRA_API.tql.parse(query_string).is_filter: # type: ignore
indexes = dsv.indexes
return dataset[indexes]
except RuntimeError:
else:
view = DeepLakeQueryDataset(deeplake_ds=dataset, indra_ds=dsv)
view._tql_query = query_string
if hasattr(dataset, "is_actually_cloud"):
Expand Down
1 change: 1 addition & 0 deletions deeplake/requirements/tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ rich
wandb

pandas; python_version >= '3.11' and sys_platform == 'win32'
datasets; python_version >= '3.11' and sys_platform == 'win32'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def libdeeplake_availabe():
extras_require["all"] = [req_map[r] for r in all_extras]

if libdeeplake_availabe():
libdeeplake = "libdeeplake==0.0.52"
libdeeplake = "libdeeplake==0.0.53"
extras_require["enterprise"] = [libdeeplake, "pyjwt"]
extras_require["all"].append(libdeeplake)

Expand Down

0 comments on commit 29e49b3

Please sign in to comment.