Skip to content

Commit

Permalink
Compute gradient norm without using clip_grad_norm_
Browse files Browse the repository at this point in the history
  • Loading branch information
kduxin committed Oct 17, 2022
1 parent 3d7c59d commit 4d82553
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast
from torch.cuda.amp.grad_scaler import GradScaler
from torch.nn.utils import clip_grad_norm_

from corpusit import Vocab, SkipGramDataset
from firelang import FIREWord
Expand Down Expand Up @@ -164,7 +163,6 @@ def train(args):

best_iter, best_simscore, best_loss = -1, 0, float("Inf")
best_savepath = f"{args.savedir}/best"
grad_norm = 0.0
for i in range(1, args.n_iters + 1):

with Timer(elapsed, "prepare", sync_cuda=True):
Expand Down Expand Up @@ -200,7 +198,11 @@ def train(args):
else:
steploss.backward()
# do not clip, only for acquiring the gradient norm.
grad_norm = clip_grad_norm_(model.parameters(), sys.float_info.max)
grad_norm = (
torch.cat([p.grad.data.reshape(-1) for p in model.parameters()])
.norm()
.item()
)

if args.profile:
logger.debug("----- backward -----")
Expand Down

0 comments on commit 4d82553

Please sign in to comment.