Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add auto-tiling and fix weighting after merging #7

Merged
merged 23 commits into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5351226
add auto-tiling and fix weighting after merging
renerichter Sep 27, 2021
fdc38c8
add minimal overlap
renerichter Sep 29, 2021
6db742e
Merge remote-tracking branch 'origin/master' into renerichter-rl
the-lay Nov 8, 2021
c10c04c
Code linting
the-lay Nov 9, 2021
a14beea
Added ndarray types; Throw an exception in case of non-matching data.…
the-lay Nov 18, 2021
ab58cd3
Removing flattop window since it has negative values
the-lay Nov 18, 2021
46e948b
Code linting
the-lay Nov 19, 2021
a6e56e4
Added dtype keyword to Merger that specifies dtype of data buffer
the-lay Dec 10, 2021
4474666
Added apply_padding method; overlap now can be given as a numpy array
the-lay Dec 10, 2021
3640e2d
Small documentation fixes
the-lay Dec 10, 2021
ae79a80
Saving Merger data_visits is now optional
the-lay Dec 10, 2021
ebac20a
Fixed data visits check
the-lay Dec 10, 2021
51cc98c
Added an uncovered edge case test
the-lay Dec 11, 2021
5eee24b
Added test for Merger with disabled save_visits
the-lay Dec 11, 2021
aa86420
Refactored normalization by weights in merging
the-lay Dec 11, 2021
69ab5c9
Fixed explicit padding for odd data shapes
the-lay Dec 11, 2021
d0a559e
Hiding division by zero warning when normalizing by weight
the-lay Dec 11, 2021
f0b1f9b
Code linting
the-lay Dec 11, 2021
bd1cd9e
Updated documentation
the-lay Dec 11, 2021
f032b09
Fixing trying to submit coveralls on pull requests
the-lay Dec 11, 2021
054b5d7
Teaser image generated script now actually tiles and merges the image :)
the-lay Dec 11, 2021
a02c0c6
Merger buffer dtypes are now hardcoded, optional casting to specified…
the-lay Dec 11, 2021
c230be4
Refactored extra padding system and updated examples
the-lay Dec 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added apply_padding method; overlap now can be given as a numpy array
  • Loading branch information
the-lay committed Dec 10, 2021
commit 447466652258098558dd13a1ce000533707377a9
29 changes: 26 additions & 3 deletions tests/test_tiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ def test_drop_mode(self):
Tiler(data_shape=(2, 100), tile_shape=(1, 64), overlap=32, mode="drop")

# Drop mode with tile shape bigger than data shape
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with self.assertWarns(Warning):
tiler = Tiler(data_shape=(1, 63), tile_shape=(1, 64), mode="drop")
self.assertEqual(tiler.n_tiles, 0)

Expand Down Expand Up @@ -400,7 +399,7 @@ def test_overlap(self):
np.testing.assert_equal(expected_split, calculated_split)

# Case 3
# Overlap is provided as tuple or list
# Overlap is provided as tuple, list or np.ndarray
# Let's try a slightly more complicated test case with a channel dimension
tile_size = 10
data = np.vstack((self.data, self.data * 2, self.data * 3))
Expand Down Expand Up @@ -543,3 +542,27 @@ def test_tile_bbox_position(self):
np.testing.assert_equal(
([0, 90], [3, 100]), tiler2.get_tile_bbox_position(tile_id, True)
)

def test_apply_padding(self):

t1 = Tiler(data_shape=self.data.shape,
tile_shape=(3,),
mode="reflect")

# without apply padding the padding is done based on data in the tile
# last tile has only one 99, but when reflect padded becomes 99, 99, 99
np.testing.assert_equal(t1.get_tile(self.data, 0), [0, 1, 2])
np.testing.assert_equal(t1.get_tile(self.data, len(t1) - 1), [99, 99, 99])

# padding should now be correctly applied,
data = t1.apply_padding(self.data, mode="reflect")
np.testing.assert_equal(t1.get_tile(data, 0), [1, 0, 1])
np.testing.assert_equal(t1.get_tile(data, len(t1) - 1), [98, 99, 98])

# giving data with wrong (in this case old) data shape should raise an exception
with self.assertRaises(ValueError):
t1.apply_padding(self.data)

# re applying padding should show a warning
with self.assertWarns(Warning):
t1.apply_padding(data, mode="reflect")
240 changes: 87 additions & 153 deletions tiler/tiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@ def __init__(
self,
data_shape: Union[Tuple, List, np.ndarray],
tile_shape: Union[Tuple, List, np.ndarray],
overlap: Union[int, float, Tuple, List] = 0,
overlap: Union[int, float, Tuple, List, np.ndarray] = 0,
channel_dimension: Optional[int] = None,
mode: str = "constant",
constant_value: float = 0.0,
get_padding: bool = False,
):
"""Tiler class precomputes everything for tiling with specified parameters, without actually slicing data.
You can access tiles individually with `Tiler.get_tile()` or with an iterator, both individually and in batches,
Expand All @@ -54,43 +53,50 @@ def __init__(
tile_shape (tuple, list or np.ndarray): Shape of a tile, e.g. (256, 256, 3), [64, 64, 64] or np.ndarray([3, 128, 128]).
Tile must have the same number of dimensions as data.

overlap (int, float, tuple or list): Specifies overlap between tiles.
overlap (int, float, tuple, list or np.ndarray): Specifies overlap between tiles.
If integer, the same overlap of overlap pixels applied in each dimension, except channel_dimension.
If float, percentage of a tile_shape to overlap (from 0.0 to 1.0), except channel_dimension.
If tuple or list, explicit size of the overlap (must be smaller than tile_shape).
If tuple, list or np.ndarray, explicit size of the overlap (must be smaller than tile_shape in each dimension).
Default is `0`.

channel_dimension (int, optional): Specifies which axis is the channel dimension that will not be tiled.
Usually it is the last or the first dimension of the array.
Negative indexing (`-len(data_shape)` to `-1` inclusive) is translated into corresponding indices.
Negative indexing (`-len(data_shape)` to `-1` inclusive) is allowed.
Default is `None`, no channel dimension in the data.

mode (str): Defines how the data will be tiled.
Must be one of the supported `Tiler.TILING_MODES`.
Must be one of the supported `Tiler.TILING_MODES`. Defaults to `"constant"`.

constant_value (float): Specifies the value of padding when `mode='constant'`.
Default is `0.0`.
"""

self._recalculate(data_shape, tile_shape,
overlap, channel_dimension,
mode, constant_value)

def _recalculate(self,
data_shape: Union[Tuple, List, np.ndarray],
tile_shape: Union[Tuple, List, np.ndarray],
overlap: Union[int, float, Tuple, List, np.ndarray] = 0,
channel_dimension: Optional[int] = None,
mode: str = "constant",
constant_value: float = 0.0,
) -> None:

# Data and tile shapes
self.data_shape = np.asarray(data_shape).astype(int)
self.tile_shape = np.asarray(tile_shape).astype(int)
self._n_dim: int = len(self.data_shape)
self._padding = tuple((0, 0) for _ in range(self._n_dim))
if (self.tile_shape <= 0).any() or (self.data_shape <= 0).any():
raise ValueError(
"Tile and data shapes must be tuple or lists of positive numbers."
)
if self.tile_shape.size != self.data_shape.size:
raise ValueError("Tile and data shapes must have the same length.")

self.overlap = overlap

# need to caclulate get correct padding?
if get_padding:
self.pads = self.calculate_padding(
self.data_shape, self.tile_shape, np.asarray(self.overlap)
)
self.data_shape = self.fix_data_shape(self.data_shape, self.pads)
raise ValueError("Tile and data shapes must have the same length. "
"Hint: if you require tiles with less dimensions than data, put 1 in sliced dimensions, "
"e.g. to get 1d 64px lines of 2d 64x64px image would mean tile_shape of (64, 1).")

# Tiling mode
self.mode = mode
Expand All @@ -117,6 +123,7 @@ def __init__(
self.channel_dimension = self._n_dim + self.channel_dimension

# Overlap and step
self.overlap = overlap
if isinstance(self.overlap, float):
if self.overlap < 0 or self.overlap > 1.0:
raise ValueError(
Expand Down Expand Up @@ -144,7 +151,7 @@ def __init__(
if self.channel_dimension is not None:
self._tile_overlap[self.channel_dimension] = 0

elif isinstance(self.overlap, list) or isinstance(self.overlap, tuple):
elif isinstance(self.overlap, list) or isinstance(self.overlap, tuple) or isinstance(self.overlap, np.ndarray):
if np.any(np.array(self.overlap) < 0) or np.any(
self.overlap >= self.tile_shape
):
Expand All @@ -154,7 +161,7 @@ def __init__(

else:
raise ValueError(
"Unsupported overlap mode (not float, int, list or tuple)."
"Unsupported overlap mode (not float, int, list, tuple or np.ndarray)."
)

self._tile_step: np.ndarray = (self.tile_shape - self._tile_overlap).astype(
Expand Down Expand Up @@ -198,7 +205,7 @@ def __init__(
if self.n_tiles == 0:
warnings.warn(
f"Tiler (mode={mode}, overlap={overlap}) will split data_shape {data_shape} "
f"into zero tiles (tile_shape={tile_shape})"
f"into zero tiles (tile_shape={tile_shape})."
)

def __len__(self) -> int:
Expand All @@ -223,6 +230,17 @@ def __repr__(self) -> str:
f"\n\tChannel dimension: {self.channel_dimension}"
)

def __call__(
self,
data: Union[np.ndarray, Callable[..., np.ndarray]],
progress_bar: bool = False,
batch_size: int = 0,
drop_last: bool = False,
copy_data: bool = True,
) -> Generator[Tuple[int, np.ndarray], None, None]:
"""Alias for `Tiler.iterate()`"""
return self.iterate(data, progress_bar, batch_size, drop_last, copy_data)

def iterate(
self,
data: Union[np.ndarray, Callable[..., np.ndarray]],
Expand Down Expand Up @@ -307,17 +325,6 @@ def iterate(
)
yield tile_i // batch_size, tiles

def __call__(
self,
data: Union[np.ndarray, Callable[..., np.ndarray]],
progress_bar: bool = False,
batch_size: int = 0,
drop_last: bool = False,
copy_data: bool = True,
) -> Generator[Tuple[int, np.ndarray], None, None]:
"""Alias for `Tiler.iterate()`"""
return self.iterate(data, progress_bar, batch_size, drop_last, copy_data)

def get_tile(
self,
data: Union[np.ndarray, Callable[..., np.ndarray]],
Expand Down Expand Up @@ -482,133 +489,60 @@ def get_mosaic_shape(self, with_channel_dim: bool = False) -> np.ndarray:
]
return self._indexing_shape

def calculate_padding(
self,
data_shape_nonpad: np.ndarray,
tile_shape: np.ndarray,
overlap: np.ndarray,
pprint: Optional[bool] = False,
) -> np.ndarray:
"""Calculates the Padding from a given input.


Parameters
----------
data_shape_nonpad : Union[Tuple, List]
[description]
tile_shape : Union[Tuple, List]
[description]
overlap : Union[int, float, Tuple, List], optional
[description], by default 0
pprint : Optional[bool], optional
[description], by default False

Returns
-------
pads: np.ndarray
List of padding to applied to the different dimensions

ToDo
----
1) Update description.
2) implement for non-even tileshapes.
3) add for percentage overlapping.
"""
# overlap assumed in pixels for now; cannot be bigger than tile_shape nor smaller than 0
overlap[overlap < 0] = 0
overlap = np.mod(overlap, tile_shape)

# get padding -> note: at max adding 1 more tile should be nessary as negative overlap is not allowed
step_size = tile_shape - overlap
dis = (data_shape_nonpad - tile_shape) / step_size

# assuming even tileshapes
last_pos = tile_shape + np.ceil(dis) * step_size
pad_add = last_pos - data_shape_nonpad

# calculate pads and (if uneven padding necessary) pad more to the right
pads = np.transpose([pad_add // 2, pad_add // 2 + np.mod(pad_add, 2)]).astype(
"int"
)

# pretty print-out results if wanted
if pprint:
print(
f"Input: data_shape_nonpad={data_shape_nonpad},\t tile_shape={tile_shape},\t overlap=\t{overlap}\npads=\t{list(pads)}."
)
def apply_padding(self,
data: np.ndarray,
mode: str = "reflect",
) -> np.ndarray:
"""Applies difference between required data shape and original data shape as padding around data.
Automatically adjusts Tiler parameters and returns padded data.
Moreover, Merger (with `unpad=True`) will automatically remove padding created with this method.
Consider using this instead of relying on tile automatic padding (see example below).

Example:
```python
>>> data = np.arange(0, 10) # [0 1 2 3 4 5 6 7 8 9]
>>> tiler = Tiler(data_shape=(10, ), tile_shape=(3, ), mode="reflect")
>>> # without apply_padding, padding in tiles is generated only looking at that tile
>>> tiler.get_tile(data, 0) # [ 0 1 2]
>>> tiler.get_tile(data, 3) # [ 9 9 9]
>>> # with apply_padding, the data is padded correctly
>>> data = tiler.apply_padding(data) # [1 0 1 2 3 4 5 6 7 8 9 8]
>>> tiler.data_shape # (12, )
>>> tiler.get_tile(data, 0) # [ 1 0 1]
>>> tiler.get_tile(data, 3) # [ 8 9 8]
```

return pads
Args:
data (np.ndarray):
The data which will be padded.
mode (str):
Numpy padding mode. Defaults to "reflect".
[Read more on numpy docs](https://numpy.org/doc/stable/reference/generated/numpy.pad.html).

def pad_outer(
self,
data: Union[np.ndarray, Callable[..., np.ndarray]],
pads: Union[np.ndarray, Tuple, List],
) -> np.ndarray:
"""Simple padding wrapper to be part of the routine.

Parameters
----------
data : Union[np.ndarray, Callable[..., np.ndarray]]
[description]
pads : Union[np.ndarray, Tuple, List]
[description]

Returns
-------
[type]
[description]
Returns:
np.ndarray: Padded data.
"""
return np.pad(data, pads, mode="reflect")
# Warn user if padding was already applied before
if np.any(self._padding):
warnings.warn(
f"Padding was already applied before! Overwriting old parameters."
)

def fix_data_shape(
self, data_shape: np.ndarray, pads: Union[np.ndarray, Tuple, List]
):
"""Calculate correct padded data-shape.

Parameters
----------
data_shape : np.ndarray
[description]
pads : Union[np.ndarray, Tuple, List]
[description]

Returns
-------
[type]
[description]
"""
data_shape_new = np.array(data_shape)
for m, pad in enumerate(pads):
data_shape_new[m] += pad[0] + pad[1]
# If user provided data that is different from initialized data shape, raise an error
if np.not_equal(self.data_shape, data.shape).any():
raise ValueError(f"Provided data has a different shape (data.shape={data.shape}) than what Tiler "
f"was initialized with (self.data_shape={self.data_shape}).")

return data_shape_new
# Split shape diff in two and use those values as frame padding for data
pre_pad = self._shape_diff // 2
post_pad = (self._shape_diff // 2) + np.mod(self._tile_overlap, 2)

def calculate_minimal_overlap(
self,
data_shape: np.ndarray,
tile_shape: np.ndarray,
pprint: Optional[bool] = False,
) -> tuple:

# get padding
rmod = np.mod(data_shape, tile_shape)
pad_add = np.mod(tile_shape - rmod, tile_shape)
data_shape_new = data_shape + pad_add
pads = np.transpose(
[np.ceil(pad_add / 2), np.ceil(pad_add / 2) + np.mod(pad_add, 2)]
).astype("int")

# get minimal overlap
divs = (data_shape_new // tile_shape) - 1
divs[divs < 1] = 1
overlap = np.floor(pad_add / divs).astype("int")
overlap_perc = overlap / tile_shape

# pretty print
if pprint:
print(
f"Input: data_shape=\t{data_shape},\t tile_shape={tile_shape}\noverlap=\t{overlap}\noverlap_perc=\t{overlap_perc}\npads=\t\t{list(pads)}\n~~~~~~~~~~~~~~~~~~~~"
)
# Adjust parameters for new data shape and
self._recalculate(self._new_shape,
self.tile_shape,
self.overlap,
)
self._padding = list(zip(pre_pad, post_pad))

# done?
return overlap, overlap_perc, pads
# Return padded input data
return np.pad(data, self._padding, mode)