add auto-tiling and fix weighting after merging

the-lay · the-lay · Dec 14, 2021 · Sep 27, 2021 · Sep 29, 2021 · Nov 8, 2021
commit 5351226187c52c78cf0653621270845f60ee7a30
diff --git a/tiler/merger.py b/tiler/merger.py
@@ -47,7 +47,8 @@ class Merger:
     def __init__(self,
                  tiler: Tiler,
                  window: Union[None, str, np.ndarray] = None,
-                 logits: int = 0):
+                 logits: int = 0,
+                 atol: float = 1e-10):
         """Merger precomputes everything for merging together tiles created by given Tiler.
 
         TODO:
@@ -64,6 +65,7 @@ def __init__(self,
         """
 
         self.tiler = tiler
+        self.atol = atol
 
         # Logits support
         if not isinstance(logits, int) or logits < 0:
@@ -152,7 +154,8 @@ def set_window(self, window: Union[None, str, np.ndarray] = None) -> None:
                 raise ValueError(f'Window function must have the same shape as tile shape.')
             self.window = window
         else:
-            raise ValueError(f'Unsupported type for window function ({type(window)}), expected str or np.ndarray.')
+            raise ValueError(
+                f'Unsupported type for window function ({type(window)}), expected str or np.ndarray.')
 
     def reset(self) -> None:
         """Reset data and normalization buffers.
@@ -192,7 +195,8 @@ def add(self, tile_id: int, data: np.ndarray) -> None:
                              f'There are {len(self.tiler)} tiles, starting from index 0.')
 
         data_shape = np.array(data.shape)
-        expected_tile_shape = ((self.logits, ) + tuple(self.tiler.tile_shape)) if self.logits > 0 else tuple(self.tiler.tile_shape)
+        expected_tile_shape = ((self.logits, ) + tuple(self.tiler.tile_shape)
+                               ) if self.logits > 0 else tuple(self.tiler.tile_shape)
 
         if self.tiler.mode != 'irregular':
             if not np.all(np.equal(data_shape, expected_tile_shape)):
@@ -212,7 +216,8 @@ def add(self, tile_id: int, data: np.ndarray) -> None:
 
         # TODO check for self.data and data dtypes mismatch?
         if self.logits > 0:
-            self.data[tuple([slice(None, None, None)] + sl)] += (data * self.window[tuple(win_sl[1:])])
+            self.data[tuple([slice(None, None, None)] + sl)
+                      ] += (data * self.window[tuple(win_sl[1:])])
             self.weights_sum[tuple(sl)] += self.window[tuple(win_sl[1:])]
         else:
             self.data[tuple(sl)] += (data * self.window[tuple(win_sl)])
@@ -236,14 +241,62 @@ def add_batch(self, batch_id: int, batch_size: int, data: np.ndarray) -> None:
         n_batches = (div + 1) if mod > 0 else div
 
         if batch_id < 0 or batch_id >= n_batches:
-            raise IndexError(f'Out of bounds. There are {n_batches} batches of {batch_size}, starting from index 0.')
+            raise IndexError(
+                f'Out of bounds. There are {n_batches} batches of {batch_size}, starting from index 0.')
 
         # add each tile in a batch with computed tile_id
         for data_i, tile_i in enumerate(range(batch_id * batch_size,
-                                        min((batch_id + 1) * batch_size, len(self.tiler)))):
+                                              min((batch_id + 1) * batch_size, len(self.tiler)))):
             self.add(tile_i, data[data_i])
 
-    def merge(self, unpad: bool = True, argmax: bool = False) -> np.ndarray:
+    def norm_by_weights(self, data: np.ndarray, weights: np.ndarray, atol: float = 1e-10, in_place: bool = True) -> np.ndarray:
+        """Normalised applied weights such that sum guarantees approx. 1. 
+
+        Parameters
+        ----------
+        data : np.ndarray
+            padded image data
+        weights : np.ndarray
+            weights that were generated by merging-process
+        atol : float, optional
+            absolute tolarenced weights size assuming weights in [0,1], by default 1e-10
+
+        Returns
+        -------
+        data_re: np.ndarray
+            reweighted data
+        """
+        # do operation in-place?
+        if not in_place:
+            weights = np.array(weights)
+
+        # avoid division by values close to 0
+        weights[weights < atol] = 1
+
+        # reweight
+        data_re = data/weights
+
+        return data_re
+
+    def do_unpad(self, data: np.ndarray, pads: np.ndarray) -> np.ndarray:
+        """Simple unpadding using the data-set and applied pads.
+
+        Parameters
+        ----------
+        data : [type]
+            [description]
+        pads : [type]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
+        slices = [slice(pad[0], data.shape[m]-pad[1]) for m, pad in enumerate(pads)]
+        return data[slices]
+
+    def merge(self, unpad: bool = True, argmax: bool = False, data_orig_shape: np.ndarray = None) -> np.ndarray:
         """Returns final merged data array obtained from added tiles.
 
         Args:
@@ -255,15 +308,25 @@ def merge(self, unpad: bool = True, argmax: bool = False) -> np.ndarray:
         """
         data = self.data
 
-        if unpad:
-            sl = [slice(None, self.tiler.data_shape[i]) for i in range(len(self.tiler.data_shape))]
-
-            if self.logits:
-                sl = [slice(None, None, None)] + sl
+        # normalize weights to have final sum_up to 1
+        data = self.norm_by_weights(data, self.weights_sum, self.atol)
 
-            data = data[tuple(sl)]
+        if unpad:
+            if not hasattr(self.tiler, 'pads'):
+                if self.data_orig_shape is None:
+                    raise ValueError(
+                        'data_orig_shape needs to be given if data_shape was aautomatically calculated.')
+                self.data_orig_shape = data_orig_shape
+                self.pads = self.tiler.calculate_padding(
+                    self.data_orig_shape, self.overlap, self.tile_shape)
+            data = self.do_unpad(data, self.tiler.pads)
+        #    sl = [slice(None, self.tiler.data_shape[i]) for i in range(len(self.tiler.data_shape))]
+        #
+        #    if self.logits:
+        #        sl = [slice(None, None, None)] + sl
+        #
+        #    data = data[tuple(sl)]
 
         if argmax:
             data = np.argmax(data, 0)
-
         return data
diff --git a/tiler/tiler.py b/tiler/tiler.py
@@ -34,7 +34,8 @@ def __init__(self,
                  overlap: Union[int, float, Tuple, List] = 0,
                  channel_dimension: Optional[int] = None,
                  mode: str = 'constant',
-                 constant_value: float = 0.0):
+                 constant_value: float = 0.0,
+                 get_padding: bool = False):
         """Tiler class precomputes everything for tiling with specified parameters, without actually slicing data.
         You can access tiles individually with `Tiler.get_tile()` or with an iterator, both individually and in batches,
         with `Tiler.iterate()` (or the alias `Tiler.__call__()`).
@@ -78,10 +79,19 @@ def __init__(self,
         if self.tile_shape.size != self.data_shape.size:
             raise ValueError('Tile and data shapes must have the same length.')
 
+        self.overlap = overlap
+
+        # need to caclulate get correct padding?
+        if get_padding:
+            self.pads = self.calculate_padding(
+                self.data_shape, self.tile_shape, np.asarray(self.overlap))
+            self.data_shape = self.fix_data_shape(self.data_shape, self.pads)
+
         # Tiling mode
         self.mode = mode
         if self.mode not in self.TILING_MODES:
-            raise ValueError(f'{self.mode} is an unsupported tiling mode, please check the documentation.')
+            raise ValueError(
+                f'{self.mode} is an unsupported tiling mode, please check the documentation.')
 
         # Constant value used for constant tiling mode
         self.constant_value = constant_value
@@ -97,7 +107,6 @@ def __init__(self,
                 self.channel_dimension = self._n_dim + self.channel_dimension
 
         # Overlap and step
-        self.overlap = overlap
         if isinstance(self.overlap, float):
             if self.overlap < 0 or self.overlap > 1.0:
                 raise ValueError('Float overlap must be in range of 0.0 (0%) to 1.0 (100%).')
@@ -107,9 +116,11 @@ def __init__(self,
                 self._tile_overlap[self.channel_dimension] = 0
 
         elif isinstance(self.overlap, int):
-            tile_shape_without_channel = self.tile_shape[np.arange(self._n_dim) != self.channel_dimension]
+            tile_shape_without_channel = self.tile_shape[np.arange(
+                self._n_dim) != self.channel_dimension]
             if self.overlap < 0 or np.any(self.overlap >= tile_shape_without_channel):
-                raise ValueError(f'Integer overlap must be in range of 0 to {np.max(tile_shape_without_channel)}')
+                raise ValueError(
+                    f'Integer overlap must be in range of 0 to {np.max(tile_shape_without_channel)}')
 
             self._tile_overlap: np.ndarray = np.array([self.overlap for _ in self.tile_shape])
             if self.channel_dimension is not None:
@@ -124,10 +135,12 @@ def __init__(self,
         else:
             raise ValueError('Unsupported overlap mode (not float, int, list or tuple).')
 
-        self._tile_step: np.ndarray = (self.tile_shape - self._tile_overlap).astype(int)  # tile step
+        self._tile_step: np.ndarray = (
+            self.tile_shape - self._tile_overlap).astype(int)  # tile step
 
         # Calculate mosaic (collection of tiles) shape
-        div, mod = np.divmod([self.data_shape[d] - self._tile_overlap[d] for d in range(self._n_dim)], self._tile_step)
+        div, mod = np.divmod([self.data_shape[d] - self._tile_overlap[d]
+                              for d in range(self._n_dim)], self._tile_step)
         if self.mode == 'drop':
             self._indexing_shape = div
         else:
@@ -149,7 +162,8 @@ def __init__(self,
             self._tile_step[self.channel_dimension] = 0
 
         # Tile indexing
-        self._tile_index = np.vstack(np.meshgrid(*[np.arange(0, x) for x in self._indexing_shape], indexing='ij'))
+        self._tile_index = np.vstack(np.meshgrid(
+            *[np.arange(0, x) for x in self._indexing_shape], indexing='ij'))
         self._tile_index = self._tile_index.reshape(self._n_dim, -1).T
         self.n_tiles = len(self._tile_index)
 
@@ -303,7 +317,8 @@ def get_tile(self,
         # get tile data
         tile_corner = self._tile_index[tile_id] * self._tile_step
         # take the lesser of the tile shape and the distance to the edge
-        sampling = [slice(tile_corner[d], np.min([self.data_shape[d], tile_corner[d] + self.tile_shape[d]])) for d in range(self._n_dim)]
+        sampling = [slice(tile_corner[d], np.min(
+            [self.data_shape[d], tile_corner[d] + self.tile_shape[d]])) for d in range(self._n_dim)]
 
         if callable(data):
             sampling = [x.stop - x.start for x in sampling]
@@ -320,7 +335,8 @@ def get_tile(self,
                 tile_data = np.pad(tile_data, list((0, diff) for diff in shape_diff), mode=self.mode,
                                    constant_values=self.constant_value)
             elif self.mode == 'reflect' or self.mode == 'edge' or self.mode == 'wrap':
-                tile_data = np.pad(tile_data, list((0, diff) for diff in shape_diff), mode=self.mode)
+                tile_data = np.pad(tile_data, list((0, diff)
+                                                   for diff in shape_diff), mode=self.mode)
 
         return tile_data
 
@@ -346,7 +362,7 @@ def get_tile_bbox_position(self, tile_id: int, with_channel_dim: bool = False) -
         finish_corner = starting_corner + self.tile_shape
         if self.channel_dimension is not None and not with_channel_dim:
             dim_indices = list(range(self.channel_dimension)) + \
-                          list(range(self.channel_dimension + 1, len(self._tile_step)))
+                list(range(self.channel_dimension + 1, len(self._tile_step)))
             starting_corner = starting_corner[dim_indices]
             finish_corner = finish_corner[dim_indices]
         return starting_corner, finish_corner
@@ -384,3 +400,97 @@ def get_mosaic_shape(self, with_channel_dim: bool = False) -> np.ndarray:
         if self.channel_dimension is not None and not with_channel_dim:
             return self._indexing_shape[~(np.arange(self._n_dim) == self.channel_dimension)]
         return self._indexing_shape
+
+    def calculate_padding(self,
+                          data_shape_nonpad: np.ndarray,
+                          tile_shape: np.ndarray,
+                          overlap: np.ndarray,
+                          pprint: Optional[bool] = False) -> np.ndarray:
+        """Calculates the Padding from a given input. 
+
+
+        Parameters
+        ----------
+        data_shape_nonpad : Union[Tuple, List]
+            [description]
+        tile_shape : Union[Tuple, List]
+            [description]
+        overlap : Union[int, float, Tuple, List], optional
+            [description], by default 0
+        pprint : Optional[bool], optional
+            [description], by default False
+
+        Returns
+        -------
+        pads: np.ndarray
+            List of padding to applied to the different dimensions
+
+        ToDo
+        ----
+        1) Update description.
+        2) implement for non-even tileshapes.
+        3) add for percentage overlapping.
+        """
+        # overlap assumed in pixels for now; cannot be bigger than tile_shape nor smaller than 0
+        overlap[overlap < 0] = 0
+        overlap = np.mod(overlap, tile_shape)
+
+        # get padding -> note: at max adding 1 more tile should be nessary as negative overlap is not allowed
+        step_size = tile_shape-overlap
+        dis = (data_shape_nonpad-tile_shape)/step_size
+
+        # assuming even tileshapes
+        last_pos = tile_shape+np.ceil(dis)*step_size
+        pad_add = last_pos-data_shape_nonpad
+
+        # calculate pads and (if uneven padding necessary) pad more to the right
+        pads = np.transpose([pad_add//2, pad_add//2+np.mod(pad_add, 2)]).astype('int')
+
+        # pretty print-out results if wanted
+        if pprint:
+            print(
+                f"Input: data_shape_nonpad={data_shape_nonpad},\t tile_shape={tile_shape},\t overlap=\t{overlap}\npads=\t{list(pads)}.")
+
+        return pads
+
+    def pad_outer(self,
+                  data: Union[np.ndarray, Callable[..., np.ndarray]],
+                  pads: Union[np.ndarray, Tuple, List]) -> np.ndarray:
+        """Simple padding wrapper to be part of the routine.
+
+        Parameters
+        ----------
+        data : Union[np.ndarray, Callable[..., np.ndarray]]
+            [description]
+        pads : Union[np.ndarray, Tuple, List]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
+        return np.pad(data, pads, mode='reflect')
+
+    def fix_data_shape(self,
+                       data_shape: np.ndarray,
+                       pads: Union[np.ndarray, Tuple, List]):
+        """Calculate correct padded data-shape.
+
+        Parameters
+        ----------
+        data_shape : np.ndarray
+            [description]
+        pads : Union[np.ndarray, Tuple, List]
+            [description]
+
+        Returns
+        -------
+        [type]
+            [description]
+        """
+        data_shape_new = np.array(data_shape)
+        for m, pad in enumerate(pads):
+            data_shape_new[m] += (pad[0]+pad[1])
+
+        return data_shape_new