diff --git a/config/loveda/dcswin.py b/config/loveda/dcswin.py index 064dd96..d7ed781 100644 --- a/config/loveda/dcswin.py +++ b/config/loveda/dcswin.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.loveda_dataset import * from geoseg.models.DCSwin import dcswin_small -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 30 @@ -63,7 +63,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} # 0.1xlr for backbone -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epoch, eta_min=1e-6) diff --git a/config/loveda/unetformer.py b/config/loveda/unetformer.py index 2133d04..4513f16 100644 --- a/config/loveda/unetformer.py +++ b/config/loveda/unetformer.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.loveda_dataset import * from geoseg.models.UNetFormer import UNetFormer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 30 @@ -79,7 +79,7 @@ def train_aug(img, mask): # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epoch, eta_min=1e-6) diff --git a/config/potsdam/dcswin.py b/config/potsdam/dcswin.py index af8a0da..7b3c5a5 100644 --- a/config/potsdam/dcswin.py +++ b/config/potsdam/dcswin.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.potsdam_dataset import * from geoseg.models.DCSwin import dcswin_small -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params from timm.scheduler.poly_lr import PolyLRScheduler # training hparam @@ -96,7 +96,7 @@ def val_aug(img, mask): # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2) diff --git a/config/potsdam/ftunetformer.py b/config/potsdam/ftunetformer.py index 9e668ab..5a5cbd8 100644 --- a/config/potsdam/ftunetformer.py +++ b/config/potsdam/ftunetformer.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.potsdam_dataset import * from geoseg.models.FTUNetFormer import ft_unetformer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 45 @@ -64,7 +64,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2) \ No newline at end of file diff --git a/config/potsdam/unetformer.py b/config/potsdam/unetformer.py index 4490d36..90031b3 100644 --- a/config/potsdam/unetformer.py +++ b/config/potsdam/unetformer.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.potsdam_dataset import * from geoseg.models.UNetFormer import UNetFormer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 45 @@ -17,8 +17,6 @@ num_classes = len(CLASSES) classes = CLASSES -test_time_aug = 'd4' -output_mask_dir, output_mask_rgb_dir = None, None weights_name = "unetformer-r18-768crop-ms-e45" weights_path = "model_weights/potsdam/{}".format(weights_name) test_weights_name = "unetformer-r18-768crop-ms-e45" @@ -64,7 +62,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2) diff --git a/config/uavid/unetformer.py b/config/uavid/unetformer.py index f957b5f..44ad9cc 100644 --- a/config/uavid/unetformer.py +++ b/config/uavid/unetformer.py @@ -6,8 +6,8 @@ from geoseg.losses import * from geoseg.datasets.uavid_dataset import * from geoseg.models.UNetFormer import UNetFormer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 40 @@ -66,7 +66,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epoch) diff --git a/config/vaihingen/dcswin.py b/config/vaihingen/dcswin.py index 0df1c6c..36e500a 100644 --- a/config/vaihingen/dcswin.py +++ b/config/vaihingen/dcswin.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.vaihingen_dataset import * from geoseg.models.DCSwin import dcswin_small -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 70 @@ -96,7 +96,7 @@ def val_aug(img, mask): # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2) diff --git a/config/vaihingen/ftunetformer.py b/config/vaihingen/ftunetformer.py index 574db17..a86e54b 100644 --- a/config/vaihingen/ftunetformer.py +++ b/config/vaihingen/ftunetformer.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.vaihingen_dataset import * from geoseg.models.FTUNetFormer import ft_unetformer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 45 @@ -64,7 +64,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2) diff --git a/config/vaihingen/unetformer.py b/config/vaihingen/unetformer.py index 3d451a1..e79c3e5 100644 --- a/config/vaihingen/unetformer.py +++ b/config/vaihingen/unetformer.py @@ -2,8 +2,8 @@ from geoseg.losses import * from geoseg.datasets.vaihingen_dataset import * from geoseg.models.UNetFormer import UNetFormer -from catalyst.contrib.nn import Lookahead -from catalyst import utils +from tools.utils import Lookahead +from tools.utils import process_model_params # training hparam max_epoch = 105 @@ -62,7 +62,7 @@ # define the optimizer layerwise_params = {"backbone.*": dict(lr=backbone_lr, weight_decay=backbone_weight_decay)} -net_params = utils.process_model_params(net, layerwise_params=layerwise_params) +net_params = process_model_params(net, layerwise_params=layerwise_params) base_optimizer = torch.optim.AdamW(net_params, lr=lr, weight_decay=weight_decay) optimizer = Lookahead(base_optimizer) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2) diff --git a/requirements.txt b/requirements.txt index 16e46fd..e723e65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ timm -catalyst==20.09 lightning albumentations ttach @@ -9,4 +8,6 @@ opencv-python scipy matplotlib einops -addict \ No newline at end of file +addict +ftfy +regex \ No newline at end of file diff --git a/tools/utils.py b/tools/utils.py new file mode 100644 index 0000000..8b209b9 --- /dev/null +++ b/tools/utils.py @@ -0,0 +1,196 @@ +from typing import Any, Callable, Dict, List, Optional, Union +import collections +from collections import defaultdict +import copy +import os +import re +import numpy as np +import torch +from torch import nn, Tensor +from torch.optim import Optimizer + +def merge_dicts(*dicts: dict) -> dict: + """Recursive dict merge. + Instead of updating only top-level keys, + ``merge_dicts`` recurses down into dicts nested + to an arbitrary depth, updating keys. + + Args: + *dicts: several dictionaries to merge + + Returns: + dict: deep-merged dictionary + """ + assert len(dicts) > 1 + + dict_ = copy.deepcopy(dicts[0]) + + for merge_dict in dicts[1:]: + merge_dict = merge_dict or {} + for k in merge_dict: + if ( + k in dict_ + and isinstance(dict_[k], dict) + and isinstance(merge_dict[k], collections.Mapping) + ): + dict_[k] = merge_dicts(dict_[k], merge_dict[k]) + else: + dict_[k] = merge_dict[k] + + return dict_ + + +def process_model_params( + model:nn.Module, + layerwise_params: Dict[str, dict] = None, + no_bias_weight_decay: bool = True, + lr_scaling: float = 1.0, +) -> List[Union[torch.nn.Parameter, dict]]: + """Gains model parameters for ``torch.optim.Optimizer``. + + Args: + model (torch.nn.Module): Model to process + layerwise_params (Dict): Order-sensitive dict where + each key is regex pattern and values are layer-wise options + for layers matching with a pattern + no_bias_weight_decay (bool): If true, removes weight_decay + for all ``bias`` parameters in the model + lr_scaling (float): layer-wise learning rate scaling, + if 1.0, learning rates will not be scaled + + Returns: + iterable: parameters for an optimizer + + Example:: + + """ + params = list(model.named_parameters()) + layerwise_params = layerwise_params or collections.OrderedDict() + + model_params = [] + for name, parameters in params: + options = {} + for pattern, pattern_options in layerwise_params.items(): + if re.match(pattern, name) is not None: + # all new LR rules write on top of the old ones + options = merge_dicts(options, pattern_options) + + # no bias decay from https://arxiv.org/abs/1812.01187 + if no_bias_weight_decay and name.endswith("bias"): + options["weight_decay"] = 0.0 + + # lr linear scaling from https://arxiv.org/pdf/1706.02677.pdf + if "lr" in options: + options["lr"] *= lr_scaling + + model_params.append({"params": parameters, **options}) + + return model_params + + +class Lookahead(Optimizer): + """Implements Lookahead algorithm. + + It has been proposed in `Lookahead Optimizer: k steps forward, + 1 step back`_. + + Adapted from: + https://github.com/alphadl/lookahead.pytorch (MIT License) + + .. _`Lookahead Optimizer\: k steps forward, 1 step back`: + https://arxiv.org/abs/1907.08610 + """ + + def __init__(self, optimizer: Optimizer, k: int = 5, alpha: float = 0.5): + """@TODO: Docs. Contribution is welcome.""" + self.optimizer = optimizer + self.k = k + self.alpha = alpha + self.param_groups = self.optimizer.param_groups + self.defaults = self.optimizer.defaults + self.state = defaultdict(dict) + self.fast_state = self.optimizer.state + for group in self.param_groups: + group["counter"] = 0 + + def update(self, group): + """@TODO: Docs. Contribution is welcome.""" + for fast in group["params"]: + param_state = self.state[fast] + if "slow_param" not in param_state: + param_state["slow_param"] = torch.zeros_like(fast.data) + param_state["slow_param"].copy_(fast.data) + slow = param_state["slow_param"] + slow += (fast.data - slow) * self.alpha + fast.data.copy_(slow) + + def update_lookahead(self): + """@TODO: Docs. Contribution is welcome.""" + for group in self.param_groups: + self.update(group) + + def step(self, closure: Optional[Callable] = None): + """Makes optimizer step. + + Args: + closure (callable, optional): A closure that reevaluates + the model and returns the loss. + + Returns: + computed loss + """ + loss = self.optimizer.step(closure) + for group in self.param_groups: + if group["counter"] == 0: + self.update(group) + group["counter"] += 1 + if group["counter"] >= self.k: + group["counter"] = 0 + return loss + + def state_dict(self): + """@TODO: Docs. Contribution is welcome.""" + fast_state_dict = self.optimizer.state_dict() + slow_state = { + (id(k) if isinstance(k, torch.Tensor) else k): v + for k, v in self.state.items() + } + fast_state = fast_state_dict["state"] + param_groups = fast_state_dict["param_groups"] + return { + "fast_state": fast_state, + "slow_state": slow_state, + "param_groups": param_groups, + } + + def load_state_dict(self, state_dict): + """@TODO: Docs. Contribution is welcome.""" + slow_state_dict = { + "state": state_dict["slow_state"], + "param_groups": state_dict["param_groups"], + } + fast_state_dict = { + "state": state_dict["fast_state"], + "param_groups": state_dict["param_groups"], + } + super(Lookahead, self).load_state_dict(slow_state_dict) + self.optimizer.load_state_dict(fast_state_dict) + self.fast_state = self.optimizer.state + + def add_param_group(self, param_group): + """@TODO: Docs. Contribution is welcome.""" + param_group["counter"] = 0 + self.optimizer.add_param_group(param_group) + + @classmethod + def get_from_params( + cls, params: Dict, base_optimizer_params: Dict = None, **kwargs, + ) -> "Lookahead": + """@TODO: Docs. Contribution is welcome.""" + from catalyst.registry import OPTIMIZERS + + base_optimizer = OPTIMIZERS.get_from_params( + params=params, **base_optimizer_params + ) + optimizer = cls(optimizer=base_optimizer, **kwargs) + return optimizer \ No newline at end of file