From e2344550d4f544eb1084513c466afc598e47033d Mon Sep 17 00:00:00 2001
From: FayazRahman <fayazrahman4u@gmail.com>
Date: Tue, 2 May 2023 02:29:35 +0530
Subject: [PATCH] fix

---
 deeplake/constants.py                |  4 ++++
 deeplake/core/chunk_engine.py        |  6 +++++-
 deeplake/core/transform/transform.py |  5 +++--
 deeplake/util/transform.py           | 20 ++++++++++----------
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/deeplake/constants.py b/deeplake/constants.py
index e1609bbd91..e87a8d0ec0 100644
--- a/deeplake/constants.py
+++ b/deeplake/constants.py
@@ -183,3 +183,7 @@
 TRANSFORM_RECHUNK_AVG_SIZE_BOUND = 0.1
 
 TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING = 10 * 60
+
+# Transform cache sizes
+DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE = 16
+TRANSFORM_CHUNK_CACHE_SIZE = 64 * MB
diff --git a/deeplake/core/chunk_engine.py b/deeplake/core/chunk_engine.py
index b8ca69c07c..835afff572 100644
--- a/deeplake/core/chunk_engine.py
+++ b/deeplake/core/chunk_engine.py
@@ -2721,6 +2721,10 @@ def get_avg_chunk_size(self):
         dtype = self.tensor_meta.dtype
         if dtype in ("Any", "List", None):
             return None
-        nbytes = np.prod([num_samples] + max_shape) * np.dtype(dtype).itemsize
+        shape = [num_samples] + max_shape
+        nbytes = 1
+        for dim in shape:   # not using np.prod to avoid overflow
+            nbytes *= dim
+        nbytes = nbytes * np.dtype(dtype).itemsize
         avg_chunk_size = nbytes / num_chunks
         return avg_chunk_size
diff --git a/deeplake/core/transform/transform.py b/deeplake/core/transform/transform.py
index 358459edb5..d9b0399511 100644
--- a/deeplake/core/transform/transform.py
+++ b/deeplake/core/transform/transform.py
@@ -34,6 +34,7 @@
 from deeplake.hooks import dataset_written, dataset_read
 from deeplake.util.version_control import auto_checkout
 from deeplake.util.class_label import sync_labels
+from deeplake.constants import DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE
 
 import posixpath
 
@@ -57,7 +58,7 @@ def eval(
         check_lengths: bool = True,
         pad_data_in: bool = False,
         read_only_ok: bool = False,
-        cache_size: int = 16,
+        cache_size: int = DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE,
         checkpoint_interval: int = 0,
         ignore_errors: bool = False,
         **kwargs,
@@ -135,7 +136,7 @@ def eval(
         check_lengths: bool = True,
         pad_data_in: bool = False,
         read_only_ok: bool = False,
-        cache_size: int = 16,
+        cache_size: int = DEFAULT_TRANSFORM_SAMPLE_CACHE_SIZE,
         checkpoint_interval: int = 0,
         ignore_errors: bool = False,
         **kwargs,
diff --git a/deeplake/util/transform.py b/deeplake/util/transform.py
index db4142d846..f69dac1225 100644
--- a/deeplake/util/transform.py
+++ b/deeplake/util/transform.py
@@ -16,6 +16,7 @@
     MB,
     TRANSFORM_PROGRESSBAR_UPDATE_INTERVAL,
     TRANSFORM_RECHUNK_AVG_SIZE_BOUND,
+    TRANSFORM_CHUNK_CACHE_SIZE,
 )
 from deeplake.util.dataset import try_flushing
 from deeplake.util.remove_cache import (
@@ -339,19 +340,18 @@ def create_worker_chunk_engines(
     """
     all_chunk_engines: Dict[str, ChunkEngine] = {}
     num_tries = 1000
+    storage_cache = LRUCache(MemoryProvider(), output_storage, TRANSFORM_CHUNK_CACHE_SIZE)
+    storage_cache.autoflush = False
+    # TODO: replace this with simply a MemoryProvider once we get rid of cachable
+    memory_cache = LRUCache(
+        MemoryProvider(),
+        MemoryProvider(),
+        64 * MB,
+    )
+    memory_cache.autoflush = False
     for tensor in tensors:
         for i in range(num_tries):
             try:
-                # TODO: replace this with simply a MemoryProvider once we get rid of cachable
-                memory_cache = LRUCache(
-                    MemoryProvider(),
-                    MemoryProvider(),
-                    64 * MB,
-                )
-                memory_cache.autoflush = False
-                storage_cache = LRUCache(MemoryProvider(), output_storage, 64 * MB)
-                storage_cache.autoflush = False
-
                 # this chunk engine is used to retrieve actual tensor meta and chunk_size
                 storage_chunk_engine = ChunkEngine(tensor, storage_cache, version_state)
                 existing_meta = storage_chunk_engine.tensor_meta