Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
FayazRahman committed May 1, 2023
1 parent 100f2e3 commit e234455
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 13 deletions.
4 changes: 4 additions & 0 deletions deeplake/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,7 @@
TRANSFORM_RECHUNK_AVG_SIZE_BOUND = 0.1

TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING = 10 * 60

# Transform cache sizes
DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE = 16
TRANSFORM_CHUNK_CACHE_SIZE = 64 * MB
6 changes: 5 additions & 1 deletion deeplake/core/chunk_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2721,6 +2721,10 @@ def get_avg_chunk_size(self):
dtype = self.tensor_meta.dtype
if dtype in ("Any", "List", None):
return None
nbytes = np.prod([num_samples] + max_shape) * np.dtype(dtype).itemsize
shape = [num_samples] + max_shape
nbytes = 1
for dim in shape: # not using np.prod to avoid overflow
nbytes *= dim
nbytes = nbytes * np.dtype(dtype).itemsize
avg_chunk_size = nbytes / num_chunks
return avg_chunk_size
5 changes: 3 additions & 2 deletions deeplake/core/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from deeplake.hooks import dataset_written, dataset_read
from deeplake.util.version_control import auto_checkout
from deeplake.util.class_label import sync_labels
from deeplake.constants import DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE

import posixpath

Expand All @@ -57,7 +58,7 @@ def eval(
check_lengths: bool = True,
pad_data_in: bool = False,
read_only_ok: bool = False,
cache_size: int = 16,
cache_size: int = DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE,
checkpoint_interval: int = 0,
ignore_errors: bool = False,
**kwargs,
Expand Down Expand Up @@ -135,7 +136,7 @@ def eval(
check_lengths: bool = True,
pad_data_in: bool = False,
read_only_ok: bool = False,
cache_size: int = 16,
cache_size: int = DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE,
checkpoint_interval: int = 0,
ignore_errors: bool = False,
**kwargs,
Expand Down
20 changes: 10 additions & 10 deletions deeplake/util/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
MB,
TRANSFORM_PROGRESSBAR_UPDATE_INTERVAL,
TRANSFORM_RECHUNK_AVG_SIZE_BOUND,
TRANSFORM_CHUNK_CACHE_SIZE,
)
from deeplake.util.dataset import try_flushing
from deeplake.util.remove_cache import (
Expand Down Expand Up @@ -339,19 +340,18 @@ def create_worker_chunk_engines(
"""
all_chunk_engines: Dict[str, ChunkEngine] = {}
num_tries = 1000
storage_cache = LRUCache(MemoryProvider(), output_storage, TRANSFORM_CHUNK_CACHE_SIZE)
storage_cache.autoflush = False
# TODO: replace this with simply a MemoryProvider once we get rid of cachable
memory_cache = LRUCache(
MemoryProvider(),
MemoryProvider(),
64 * MB,
)
memory_cache.autoflush = False
for tensor in tensors:
for i in range(num_tries):
try:
# TODO: replace this with simply a MemoryProvider once we get rid of cachable
memory_cache = LRUCache(
MemoryProvider(),
MemoryProvider(),
64 * MB,
)
memory_cache.autoflush = False
storage_cache = LRUCache(MemoryProvider(), output_storage, 64 * MB)
storage_cache.autoflush = False

# this chunk engine is used to retrieve actual tensor meta and chunk_size
storage_chunk_engine = ChunkEngine(tensor, storage_cache, version_state)
existing_meta = storage_chunk_engine.tensor_meta
Expand Down

0 comments on commit e234455

Please sign in to comment.