Refactored normalization by weights in merging

the-lay · the-lay · Dec 14, 2021 · Sep 27, 2021 · Sep 29, 2021 · Nov 8, 2021
commit aa8642067b44d3053874d5f7437db63be2e52a52
diff --git a/tests/test_merger.py b/tests/test_merger.py
@@ -7,7 +7,7 @@
 
 class TestMergingCommon(unittest.TestCase):
     def setUp(self) -> None:
-        self.data = np.arange(0, 100)
+        self.data = np.arange(0, 100, dtype=np.float64)
 
     def test_init(self):
         tiler = Tiler(data_shape=self.data.shape, tile_shape=(10,))
@@ -240,7 +240,7 @@ def test_overlap_tile_window(self):
 
     def test_merge(self):
 
-        # Test padding
+        # Test unpadding
         tiler = Tiler(data_shape=self.data.shape, tile_shape=(12,))
         merger = Merger(tiler)
         for t_id, t in tiler(self.data):
@@ -251,6 +251,18 @@ def test_merge(self):
             merger.merge(unpad=False), np.hstack((self.data, [0, 0, 0, 0, 0, 0, 0, 0]))
         )
 
+        # Unmatching dtype should raise an exception
+        with self.assertRaises(ValueError):
+            merger.add(0, np.zeros((12, ), dtype=np.int64))
+
+        # Test without normalization by weights
+        window = np.ones((12, )) * 2
+        merger = Merger(tiler, window=window)
+        for t_id, t in tiler(self.data):
+            merger.add(t_id, t)
+        np.testing.assert_equal(merger.merge(normalize_by_weights=False), self.data * 2)
+        np.testing.assert_equal(merger.merge(normalize_by_weights=True), self.data)
+
         # Test argmax
         merger = Merger(tiler, logits=3)
         for t_id, t in tiler(self.data):
@@ -275,3 +287,12 @@ def test_merge(self):
                 )
             ),
         )
+
+        # Test explicit unpadding
+        tiler = Tiler(data_shape=self.data.shape, tile_shape=(12,))
+        merger = Merger(tiler)
+        padded_data = tiler.apply_padding(self.data)
+        np.testing.assert_equal(tiler._padding, [(4, 4)])
+        for t_id, t in tiler(padded_data):
+            merger.add(t_id, t)
+        np.testing.assert_equal(merger.merge(unpad=True), self.data)
diff --git a/tiler/merger.py b/tiler/merger.py
@@ -60,7 +60,6 @@ def __init__(
         logits: int = 0,
         save_visits: bool = True,
         dtype: np.dtype = np.float64,
-        atol: float = 1e-10,
     ):
         """Merger precomputes everything for merging together tiles created by given Tiler.
 
@@ -84,7 +83,6 @@ def __init__(
         """
 
         self.tiler = tiler
-        self.atol = atol
 
         # Logits support
         if not isinstance(logits, int) or logits < 0:
@@ -115,6 +113,7 @@ def _generate_window(self, window: str, shape: Union[Tuple, List]) -> np.ndarray
 
         Args:
             window (str): Specifies window function. Must be one of `Merger.SUPPORTED_WINDOWS`.
+
             shape (tuple or list): Shape of the requested window.
 
         Returns:
@@ -216,6 +215,7 @@ def add(self, tile_id: int, data: np.ndarray) -> None:
 
         Args:
             tile_id (int): Specifies which tile it is.
+
             data (np.ndarray): Specifies tile data.
 
         Returns:
@@ -278,7 +278,9 @@ def add_batch(self, batch_id: int, batch_size: int, data: np.ndarray) -> None:
 
         Args:
             batch_id (int): Specifies batch number, must be >= 0.
+
             batch_size (int): Specifies batch size, must be >= 0.
+
             data (np.ndarray): Tile data array, must have shape `[batch, *tile_shape]
 
         Returns:
@@ -302,101 +304,41 @@ def add_batch(self, batch_id: int, batch_size: int, data: np.ndarray) -> None:
         ):
             self.add(tile_i, data[data_i])
 
-    def norm_by_weights(
-        self,
-        data: np.ndarray,
-        weights: np.ndarray,
-        atol: float = 1e-10,
-        in_place: bool = True,
-    ) -> np.ndarray:
-        """Normalised applied weights such that sum guarantees approx. 1.
-
-        Parameters
-        ----------
-        data : np.ndarray
-            padded image data
-        weights : np.ndarray
-            weights that were generated by merging-process
-        atol : float, optional
-            absolute tolarenced weights size assuming weights in [0,1], by default 1e-10
-
-        Returns
-        -------
-        data_re: np.ndarray
-            reweighted data
-        """
-        # do operation in-place?
-        if not in_place:
-            weights = np.array(weights)
-
-        # avoid division by values close to 0
-        weights[weights < atol] = 1
-
-        # reweight
-        data_re = data / weights
-
-        return data_re
-
-    def do_unpad(self, data: np.ndarray, pads: np.ndarray) -> np.ndarray:
-        """Simple unpadding using the data-set and applied pads.
-
-        Parameters
-        ----------
-        data : [type]
-            [description]
-        pads : [type]
-            [description]
-
-        Returns
-        -------
-        [type]
-            [description]
-        """
-        slices = [slice(pad[0], data.shape[m] - pad[1]) for m, pad in enumerate(pads)]
-        return data[slices]
-
-    def merge(
-        self,
-        unpad: bool = True,
-        argmax: bool = False,
-        data_orig_shape: np.ndarray = None,
-    ) -> np.ndarray:
-        """Returns final merged data array obtained from added tiles.
+    def merge(self,
+              unpad: bool = True,
+              argmax: bool = False,
+              normalize_by_weights: bool = True,
+              ) -> np.ndarray:
+        """Returns merged data array obtained from added tiles.
 
         Args:
             unpad (bool): If unpad is True, removes padded array elements. Default is True.
+
             argmax (bool): If argmax is True, the first dimension will be argmaxed. Default is False.
 
+            normalize_by_weights (bool): If normalize is True, the accumulated data will be divided by weights. Default is True.
+
         Returns:
             np.ndarray: Final merged data array obtained from added tiles.
         """
+
         data = self.data
 
-        # normalize weights to have final sum_up to 1
-        data = self.norm_by_weights(data, self.weights_sum, self.atol)
+        if normalize_by_weights:
+            data = np.nan_to_num(data / self.weights_sum)
 
         if unpad:
-            if not hasattr(self.tiler, "pads"):
-                if data_orig_shape is None:
-                    if self.data_orig_shape is None:
-                        raise ValueError(
-                            "data_orig_shape needs to be given if data_shape was aautomatically calculated."
-                        )
-                else:
-                    self.data_orig_shape = data_orig_shape
-                self.pads = self.tiler.calculate_padding(
-                    data_shape_nonpad=data_orig_shape,
-                    tile_shape=self.tiler.tile_shape,
-                    overlap=np.array(self.tiler.overlap),
-                )
-            data = self.do_unpad(data, self.tiler.pads)
-        #    sl = [slice(None, self.tiler.data_shape[i]) for i in range(len(self.tiler.data_shape))]
-        #
-        #    if self.logits:
-        #        sl = [slice(None, None, None)] + sl
-        #
-        #    data = data[tuple(sl)]
+            sl = [slice(pad_from, shape - pad_to)
+                  for shape, (pad_from, pad_to)
+                  in zip(self.tiler.data_shape, self.tiler._padding)]
+
+            # if merger has logits dimension, add another slicing in front
+            if self.logits:
+                sl = [slice(None, None, None)] + sl
+
+            data = data[tuple(sl)]
 
         if argmax:
             data = np.argmax(data, 0)
+
         return data