Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DL-702] API reference updates #1883

Merged
merged 15 commits into from
Sep 19, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: 2

build:
os: ubuntu-20.04
tools:
python: "3.8"

sphinx:
configuration: docs/source/conf.py

python:
install:
- requirements: hub/requirements/docs.txt
- method: pip
path: .
1 change: 1 addition & 0 deletions docs/source/Tensors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Retrieving samples
Tensor.text
Tensor.dict
Tensor.list
Tensor._linked_sample

Tensor Properties
~~~~~~~~~~~~~~~~~
Expand Down
5 changes: 5 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"sphinx.ext.autosummary",
"sphinx.ext.napoleon",
"sphinx.ext.autosectionlabel",
"sphinx_copybutton", # copy button
]
autosummary_generate = False

Expand Down Expand Up @@ -70,3 +71,7 @@

# Favicon
html_favicon = "_static/img/tensie.svg"

# copy button args
copybutton_prompt_text = ">>> |\.\.\. "
copybutton_prompt_is_regexp = True
1 change: 1 addition & 0 deletions docs/source/hub.core.tensor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ Tensor
~~~~~~
.. autoclass:: Tensor()
:members:
:private-members:
:special-members: __setitem__, __len__
13 changes: 13 additions & 0 deletions hub/api/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@


class Info(HubMemoryObject):
"""Contains optional key-value pairs that can be stored for datasets/tensors."""

def __init__(self):
self._info = {}
self._dataset = None
Expand Down Expand Up @@ -35,6 +37,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):

@property
def nbytes(self):
"""Returns size of info stored in bytes."""
return len(self.tobytes())

def __len__(self):
Expand Down Expand Up @@ -96,41 +99,51 @@ def __setattr__(self, key: str, value):
self[key] = value

def get(self, key, default=None):
"""Get value for key from info."""
return self._info.get(key, default)

def setdefault(self, key, default=None):
"""Set default value for a key in info."""
with self:
ret = self._info.setdefault(key, default)
return ret

def clear(self):
"""Clear info."""
with self:
self._info.clear()

def pop(self, key, default=None):
"""Pop item from info by key."""
with self:
popped = self._info.pop(key, default)
return popped

def popitem(self):
"""Pop item from info."""
with self:
popped = self._info.popitem()
return popped

def update(self, *args, **kwargs):
"""Update info."""
with self:
self._info.update(*args, **kwargs)

def keys(self):
"""Return all keys in info."""
return self._info.keys()

def values(self):
"""Return all values in info."""
return self._info.values()

def items(self):
"""Return all items in info."""
return self._info.items()

def replace_with(self, d):
"""Replace info with another dictionary."""
with self:
self._info.clear()
self._info.update(d)
Expand Down
7 changes: 7 additions & 0 deletions hub/api/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,12 @@ def link(

Returns:
LinkedSample: LinkedSample object that stores path and creds.

Examples:
>>> ds = hub.dataset("test/test_ds")
>>> ds.create_tensor("images", htype="link[image]")
>>> ds.images.append(hub.link("https://picsum.photos/200/300"))

See more examples :ref:`here <linked_sample_examples>`.
"""
return LinkedSample(path, creds_key)
2 changes: 1 addition & 1 deletion hub/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ def diff(
>>> {
... "image": {"data_added": [3, 6], "data_updated": {0, 2}, "created": False, "info_updated": False, "data_transformed_in_place": False},
... "label": {"data_added": [0, 3], "data_updated": {}, "created": True, "info_updated": False, "data_transformed_in_place": False},
... "other/stuff" : {data_added: [3, 3], data_updated: {1, 2}, created: True, "info_updated": False, "data_transformed_in_place": False},
... "other/stuff" : {"data_added": [3, 3], "data_updated": {1, 2}, "created": True, "info_updated": False, "data_transformed_in_place": False},
... }


Expand Down
2 changes: 2 additions & 0 deletions hub/core/linked_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ def convert_creds_key(creds_key: Optional[str], path: str):


class LinkedSample:
"""Represents a sample that is initialized using external links. See :meth:`hub.link`."""

def __init__(self, path: str, creds_key: Optional[str] = None):
self.path = path
self.creds_key = convert_creds_key(creds_key, path)
10 changes: 10 additions & 0 deletions hub/core/meta/dataset_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@


class DatasetMeta(Meta):
"""Stores dataset metadata."""

def __init__(self):
super().__init__()
self.tensors = []
Expand All @@ -15,6 +17,7 @@ def __init__(self):

@property
def visible_tensors(self):
"""Returns list of tensors that are not hidden."""
return list(
filter(
lambda t: self.tensor_names[t] not in self.hidden_tensors,
Expand All @@ -24,6 +27,7 @@ def visible_tensors(self):

@property
def nbytes(self):
"""Returns size of the metadata stored in bytes."""
# TODO: can optimize this
return len(self.tobytes())

Expand All @@ -37,6 +41,7 @@ def __getstate__(self) -> Dict[str, Any]:
return d

def add_tensor(self, name, key, hidden=False):
"""Reflect addition of tensor in dataset's meta."""
if key not in self.tensors:
self.tensor_names[name] = key
self.tensors.append(key)
Expand All @@ -51,16 +56,19 @@ def _hide_tensor(self, name):
self.is_dirty = True

def add_group(self, name):
"""Reflect addition of tensor group in dataset's meta."""
if name not in self.groups:
self.groups.append(name)
self.is_dirty = True

def delete_tensor(self, name):
"""Reflect tensor deletion in dataset's meta."""
key = self.tensor_names.pop(name)
self.tensors.remove(key)
self.is_dirty = True

def delete_group(self, name):
"""Reflect removal of a tensor group in dataset's meta."""
self.groups = list(filter(lambda g: not g.startswith(name), self.groups))
self.tensors = list(filter(lambda t: not t.startswith(name), self.tensors))
self.hidden_tensors = list(
Expand All @@ -73,11 +81,13 @@ def delete_group(self, name):
self.is_dirty = True

def rename_tensor(self, name, new_name):
"""Reflect a tensor rename in dataset's meta."""
key = self.tensor_names.pop(name)
self.tensor_names[new_name] = key
self.is_dirty = True

def rename_group(self, name, new_name):
"""Reflect renaming a tensor group in dataset's meta."""
self.groups.remove(name)
self.groups = list(
map(
Expand Down
7 changes: 4 additions & 3 deletions hub/core/meta/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@


class Meta(HubMemoryObject):
"""Contains **required** key/values that datasets/tensors use to function.
See the ``Info`` class for optional key/values for datasets/tensors.
"""

def __init__(self):
"""Contains **required** key/values that datasets/tensors use to function.
See the `Info` class for optional key/values for datasets/tensors.
"""
super().__init__()
self.version = hub.__version__

Expand Down
14 changes: 10 additions & 4 deletions hub/core/meta/tensor_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def __init__(
def add_link(
self, name, append_f: str, update_f: Optional[str], flatten_sequence: bool
):
"""Link this tensor with another."""
link = {
"append": append_f,
"flatten_sequence": flatten_sequence,
Expand All @@ -81,11 +82,12 @@ def add_link(
self.is_dirty = True

def set_hidden(self, val: bool):
"""Set visibility of tensor."""
self.hidden = val
self.is_dirty = True

def set_dtype(self, dtype: np.dtype):
"""Should only be called once."""
"""Set dtype of tensor. Should only be called once."""

if self.dtype is not None:
raise ValueError(
Expand All @@ -100,7 +102,7 @@ def set_dtype_str(self, dtype_name: str):
self.is_dirty = True

def set_htype(self, htype: str, **kwargs):
"""Should only be called once."""
"""Set htype of tensor. Should only be called once."""

if getattr(self, "htype", None) is not None:
raise ValueError(
Expand Down Expand Up @@ -132,6 +134,7 @@ def set_htype(self, htype: str, **kwargs):
_validate_links(self.links)

def update_shape_interval(self, shape: Sequence[int]):
"""Update shape interval of tensor."""
initial_min_shape = None if self.min_shape is None else self.min_shape.copy()
initial_max_shape = None if self.max_shape is None else self.max_shape.copy()

Expand All @@ -152,11 +155,13 @@ def update_shape_interval(self, shape: Sequence[int]):
self.is_dirty = True

def update_length(self, length: int):
"""Update length of tensor."""
self.length += length
if length != 0:
self.is_dirty = True

def pop(self, index):
"""Reflect popping a sample in tensor's meta."""
self.length -= 1
if self.length == 0:
self.min_shape = []
Expand All @@ -183,6 +188,7 @@ def __setstate__(self, state: Dict[str, Any]):

@property
def nbytes(self):
"""Returns size of the metadata stored in bytes."""
# TODO: optimize this
return len(self.tobytes())

Expand Down Expand Up @@ -242,7 +248,7 @@ def _required_meta_from_htype(htype: str) -> dict:


def _validate_htype_overwrites(htype: str, htype_overwrite: dict):
"""Raises errors if `htype_overwrite` has invalid keys or was missing required values."""
"""Raises errors if ``htype_overwrite`` has invalid keys or was missing required values."""

defaults = HTYPE_CONFIGURATIONS[htype]

Expand Down Expand Up @@ -281,7 +287,7 @@ def _validate_htype_overwrites(htype: str, htype_overwrite: dict):


def _replace_unspecified_values(htype: str, htype_overwrite: dict):
"""Replaces `UNSPECIFIED` values in `htype_overwrite` with the `htype`'s defaults."""
"""Replaces ``UNSPECIFIED`` values in ``htype_overwrite`` with the ``htype``'s defaults."""

defaults = HTYPE_CONFIGURATIONS[htype]

Expand Down
7 changes: 7 additions & 0 deletions hub/core/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,13 @@ def sample_info(self) -> Union[Dict, List[Dict]]:
return self._sample_info(self.index)

def _linked_sample(self):
"""Returns the linked sample at the given index. This is only applicable for tensors of ``link[]`` htype
and can only be used for exactly one sample.

>>> linked_sample = ds.abc[0]._linked_sample().path
'https://picsum.photos/200/300'

"""
if not self.is_link:
raise ValueError("Not supported as the tensor is not a link.")
if self.index.values[0].subscriptable() or len(self.index.values) > 1:
Expand Down
2 changes: 2 additions & 0 deletions hub/htype.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@
- If ``create_shape_tensor=True`` was specified during ``create_tensor`` of the tensor to which this is being added, the shape of the sample is read.
- If ``create_sample_info_tensor=True`` was specified during ``create_tensor`` of the tensor to which this is being added, the sample info is read.

.. _linked_sample_examples:

Examples
--------

Expand Down
1 change: 1 addition & 0 deletions hub/requirements/docs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sphinx-copybutton