Skip to content

Commit

Permalink
Refactored docstring to google style
Browse files Browse the repository at this point in the history
  • Loading branch information
number1roy authored and kurisusnowdeng committed Mar 29, 2022
1 parent 53b1b6e commit ec5086c
Show file tree
Hide file tree
Showing 94 changed files with 3,484 additions and 3,077 deletions.
36 changes: 21 additions & 15 deletions colossalai/amp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,27 @@


def convert_to_amp(model: nn.Module, optimizer: Optimizer, criterion: _Loss, mode: AMP_TYPE, amp_config: Config = None):
"""A helper function to wrap training components with Torch AMP modules
:param model: your model object
:type model: :class:`torch.nn.Module`
:param optimizer: your optimizer object
:type optimizer: :class:`torch.optim.Optimizer`
:param criterion: your loss function object
:type criterion: :class:`torch.nn.modules.loss._Loss`
:param mode: amp mode
:type mode: :class:`colossalai.amp.AMP_TYPE`
:param amp_config: configuration for different amp modes
:type amp_config: :class:`colossalai.context.Config` or dict
:return: (model, optimizer, criterion)
:rtype: Tuple
"""A helper function to wrap training components with Torch AMP modules.
Args:
param model (:class:`torch.nn.Module`): your model object.
optimizer (:class:`torch.optim.Optimizer`): your optimizer object.
criterion (:class:`torch.nn.modules.loss._Loss`): your loss function object.
mode (:class:`colossalai.amp.AMP_TYPE`): amp mode.
amp_config (:class:`colossalai.context.Config` or dict): configuration for different amp modes
Returns:
A tuple (model, optimizer, criterion).
Note:
``amp_config`` may vary from different mode you choose. You should check the corresponding amp mode
for more details about ``amp_config``.
For ``apex_amp``, please check
`apex_amp config <https://nvidia.github.io/apex/amp.html?highlight=apex%20amp>`_.
For ``naive_amp``, please check
`naive_amp config <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/amp/naive_amp/_fp16_optimizer.py#L42>`_.
For ``torch_amp``, please check
`torch_amp config <https://github.com/pytorch/pytorch/blob/master/torch/cuda/amp/grad_scaler.py#L97>`_.
"""
assert isinstance(mode, AMP_TYPE), \
f'expected the argument mode be AMP_TYPE, but got {type(mode)}'
Expand Down
34 changes: 25 additions & 9 deletions colossalai/amp/apex_amp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,33 @@


def convert_to_apex_amp(model: nn.Module, optimizer: Optimizer, amp_config):
"""A helper function to wrap training components with Apex AMP modules
r"""A helper function to wrap training components with Apex AMP modules
:param model: your model object
:type model: :class:`torch.nn.Module`
:param optimizer: your optimizer object
:type optimizer: :class:`torch.optim.Optimizer`
:param amp_config: configuration for nvidia apex
:type amp_config: :class:`colossalai.context.Config` or dict
Args:
model (:class:`torch.nn.Module`): your model object.
optimizer (:class:`torch.optim.Optimizer`): your optimizer object.
amp_config (:class: colossalai.context.Config or dict): configuration for initializing apex_amp.
:return: (model, optimizer)
:rtype: Tuple
The ``amp_config`` should include parameters below:
::
enabled (bool, optional, default=True)
opt_level (str, optional, default="O1")
cast_model_type (``torch.dtype``, optional, default=None)
patch_torch_functions (bool, optional, default=None)
keep_batchnorm_fp32 (bool or str, optional, default=None
master_weights (bool, optional, default=None)
loss_scale (float or str, optional, default=None)
cast_model_outputs (torch.dtype, optional, default=None)
num_losses (int, optional, default=1)
verbosity (int, default=1)
min_loss_scale (float, default=None)
max_loss_scale (float, default=2.**24)
Returns:
Tuples: A tuple (model, optimizer).
More details about ``amp_config`` refer to `amp_config <https://nvidia.github.io/apex/amp.html?highlight=apex%20amp>`_.
"""
import apex.amp as apex_amp
model, optimizer = apex_amp.initialize(model, optimizer, **amp_config)
Expand Down
11 changes: 5 additions & 6 deletions colossalai/amp/apex_amp/apex_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,18 @@ class ApexAMPOptimizer(ColossalaiOptimizer):
def backward(self, loss: Tensor):
"""Backward pass to get all gradients
:param loss: Loss computed by a loss function
:type loss: torch.Tensor
Args:
loss (torch.Tensor): Loss computed by a loss function
"""
with apex_amp.scale_loss(loss, self.optim) as scaled_loss:
scaled_loss.backward()

def clip_grad_norm(self, model: nn.Module, max_norm: float):
"""Clip gradients' norm
:param model: Your model object
:type model: torch.nn.Module
:param max_norm: The max norm value for gradient clipping
:type max_norm: float
Args:
model (torch.nn.Module): Your model object
max_norm (float): The max norm value for gradient clipping
"""
if max_norm > 0:
clip_grad_norm_fp32(apex_amp.master_params(self.optim), max_norm)
30 changes: 20 additions & 10 deletions colossalai/amp/naive_amp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,30 @@
from colossalai.utils import is_no_pp_or_last_stage
from .naive_amp import NaiveAMPOptimizer, NaiveAMPModel
from .grad_scaler import DynamicGradScaler, ConstantGradScaler
from ._fp16_optimizer import FP16Optimizer


def convert_to_naive_amp(model: nn.Module, optimizer: Optimizer, amp_config):
"""A helper function to wrap training components with naive AMP modules
"""A helper function to wrap training components with naive AMP modules. In this mode,
we forcibly cast the model weights and inputs to FP16, and cast the model outputs to FP32 to calculate loss,
which is equivalent to Apex O3.
:param model: your model object
:type model: :class:`torch.nn.Module`
:param optimizer: your optimizer object
:type optimizer: :class:`torch.optim.Optimizer`
:param amp_config: configuration for naive mode amp
:type amp_config: :class:`colossalai.context.Config` or dict
Args:
model (:class:`torch.nn.Module`): your model object
optimizer (:class:`torch.optim.Optimizer`): your optimizer object
amp_config (:class:`colossalai.context.Config` or dict): configuration for naive mode amp.
:return: (model, optimizer)
:rtype: Tuple
The ``amp_config`` should contain parameters below:
:
verbose (bool, optional): if set to `True`, will print debug info (Default: False).
clip_grad_norm (float, optional): clip gradients with this global L2 norm (Default 0).
Note that clipping is ignored if clip_grad == 0.
dynamic_grad_scale (bool): whether to use dynamic grad scaler.
Returns:
Tuples: A tuple (model, optimizer)
"""
if isinstance(model, nn.ModuleList):
# interleaved pipeline
Expand Down Expand Up @@ -46,4 +56,4 @@ def convert_to_naive_amp(model: nn.Module, optimizer: Optimizer, amp_config):
return model, optimizer


__all__ = ['convert_to_naive_amp', 'NaiveAMPOptimizer']
__all__ = ['convert_to_naive_amp', 'NaiveAMPOptimizer', 'FP16Optimizer']
27 changes: 8 additions & 19 deletions colossalai/amp/naive_amp/_fp16_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,14 @@ def _multi_tensor_copy_this_to_that(this, that, overflow_buf=None):

class FP16Optimizer(Optimizer):
"""Float16 optimizer for fp16 and bf16 data types.
:param optimizer: base optimizer such as Adam or SGD
:type optimizer: torch.optim.Optimizer
:param clip_grad: clip gradeints with this global L2 norm. Note that clipping is ignored if clip_grad == 0
:type param clip_grad: float
:param log_num_zeros_in_grad: return number of zeros in the gradients.
:type log_num_zeros_in_grad: bool
:param initial_scale: initial scale of gradient scaler
:type initial_scale: int
:param growth_factor: the growth rate of loss scale
:type growth_factor: int
:param backoff_factor: the decrease rate of loss scale
:type backoff_factor: float
:param hysterisis: delay shift in dynamic loss scaling
:type hysterisis: int
:param max_scale: maximum loss scale allowed
:type max_scale: int
:param verbose: if set to `True`, will print debug info
:type verbose: bool
Args:
optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD
grad_scaler (BaseGradScaler): grad scaler for gradient chose in
``constant_grad_scaler`` or ``dynamic_grad_scaler``.
clip_grad_norm (float, optional): clip gradients with this global L2 norm. Default 0.
Note that clipping is ignored if clip_grad == 0
verbose (bool, optional): if set to `True`, will print debug info. Default False.
"""

def __init__(self,
Expand Down
27 changes: 21 additions & 6 deletions colossalai/amp/naive_amp/naive_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
class NaiveAMPOptimizer(ColossalaiOptimizer):
"""A wrapper class for optimizer to cast all parameters to fp16
:param optim: A normal optimizer like Adam or SGD
:param args: Args used to initialize FP16 optimizer
:param kwargs: Kwargs used to initialize FP16 optimizer
:type optim: torch.optim.Optimizer
Args:
optim (torch.optim.Optimizer): A normal optimizer like Adam or SGD.
grad_scaler (BaseGradScaler): grad scaler for gradient chose in
``constant_grad_scaler`` or ``dynamic_grad_scaler``.
clip_grad_norm (float, optional): clip gradients with this global L2 norm. Default 0.
verbose (bool, optional): if set to `True`, will print debug info. Default False.
Note:
clipping is ignored if ``clip_grad_norm`` equals 0.
"""

def __init__(self, optim: Optimizer, *args, **kwargs):
Expand All @@ -40,8 +44,19 @@ def clip_grad_norm(self, model: nn.Module, max_norm: float):


class NaiveAMPModel(nn.Module):
"""A wrapper class for model to cast the model into fp16 and
r"""A wrapper class for model to cast the model into fp16 and
automatically cast the input and output
Args:
model (torch.nn.Module): torch.nn.Module to be wrapped.
output_to_fp32 (bool, optional): Whether cast output of this module into fp32. (Default: True)
parallel_mode (:class:`colossalai.context.ParallelMode`): Parallel group mode used in this module.
(Default: ``ParallelMode.DATA``)
sync_buffer (bool, optional): whether to synchronize buffer. (Default: True)
Note:
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
"""

def __init__(self,
Expand Down
31 changes: 19 additions & 12 deletions colossalai/amp/torch_amp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,25 @@ def convert_to_torch_amp(model: nn.Module,
optimizer: Optimizer,
criterion: Optional[_Loss] = None,
amp_config: Optional[Config] = None):
"""A helper function to wrap training components with Torch AMP modules
:param model: your model object
:type model: :class:`torch.nn.Module`
:param optimizer: your optimizer object
:type optimizer: :class:`torch.optim.Optimizer`
:param criterion: your loss function object
:type criterion: :class:`torch.nn.modules.loss._Loss`, optional
:param amp_config: configuration for different amp modes
:type amp_config: :class:`colossalai.context.Config` or dict, optional
:return: (model, optimizer, criterion)
:rtype: Tuple
"""A helper function to wrap training components with Pytorch AMP modules
Args:
model (:class:`torch.nn.Module`): your model object.
optimizer (:class:`torch.optim.Optimizer`): your optimizer object
criterion (:class:`torch.nn.modules.loss._Loss`, optional): your loss function object
amp_config (:class:`colossalai.context.Config` or dict, optional): configuration for Pytorch AMP.
The ``amp_config`` should include parameters below:
::
init_scale (float, optional, default=2.**16)
growth_factor (float, optional, default=2.0)
backoff_factor (float, optional, default=0.5)
growth_interval (int, optional, default=2000)
enabled (bool, optional, default=True)
Returns:
A tuple (model, optimizer, criterion)
"""
model = TorchAMPModel(model)
if amp_config is None:
Expand Down
35 changes: 20 additions & 15 deletions colossalai/amp/torch_amp/torch_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@


class TorchAMPOptimizer(ColossalaiOptimizer):
"""A wrapper class which integrate pytorch amp with an optimizer
:param optim: A normal optimizer like Adam or SGD
:param args: Args used to initialize gradient scaler
:param kwargs: Kwargs used to initialize gradient scaler
:type optim: torch.optim.Optimizer
"""A wrapper class which integrate Pytorch AMP with an optimizer
Args:
optim (torch.optim.Optimizer): A normal optimizer like Adam or SGD.
init_scale (float, optional, default=2.**16): Initial scale factor.
growth_factor (float, optional, default=2.0): Factor by which the scale is multiplied during
:meth:`update` if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
backoff_factor (float, optional, default=0.5): Factor by which the scale is multiplied during
:meth:`update` if inf/NaN gradients occur in an iteration.
growth_interval (int, optional, default=2000): Number of consecutive iterations without inf/NaN gradients
that must occur for the scale to be multiplied by ``growth_factor``.
enabled (bool, optional, default=True): If ``False``, disables gradient scaling. :meth:`step` simply
invokes the underlying ``optimizer.step()``, and other methods become no-ops.
"""

def __init__(self, optim: Optimizer, *args, **kwargs):
Expand All @@ -30,8 +36,8 @@ def __init__(self, optim: Optimizer, *args, **kwargs):
def backward(self, loss: Tensor):
"""Backward with torch amp gradient scaler
:param loss: Loss computed by a loss function
:type loss: torch.Tensor
Args:
loss (torch.Tensor): Loss computed by a loss function
"""
self.scaler.scale(loss).backward()

Expand All @@ -44,10 +50,9 @@ def step(self):
def clip_grad_norm(self, model: nn.Module, max_norm: float):
"""Apply gradient clipping to the model parameters
:param model: Your model object
:type model: torch.nn.Module
:param max_norm: Max norm value for gradient clipping
:type max_norm: float
Args:
model (torch.nn.Module): Your model object
max_norm (float): Max norm value for gradient clipping
"""
if max_norm > 0.0:
self.scaler.unscale_(self.optim)
Expand All @@ -71,8 +76,8 @@ def forward(self, *args, **kwargs):
class TorchAMPLoss(nn.Module):
"""A wrapper class for a criterion object which computes the loss in mixed-precision context
:param loss: A loss function object
:type loss: torch.nn.modules.loss._Loss
Args:
loss (torch.nn.modules.loss._Loss): A loss function object
"""

def __init__(self, loss: _Loss):
Expand Down
Loading

0 comments on commit ec5086c

Please sign in to comment.