Skip to content

Commit

Permalink
Remove calls to profile model_forward (Lightning-AI#12032)
Browse files Browse the repository at this point in the history
Co-authored-by: ananthsub <ananth.subramaniam@gmail.com>
  • Loading branch information
daniellepintz and ananthsub authored Feb 21, 2022
1 parent e50653d commit 60fe152
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 42 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Removed `log_text` and `log_image` from the `LightningLoggerBase` API ([#11857](https://github.com/PyTorchLightning/pytorch-lightning/pull/11857))


- Removed calls to `profile("model_forward")` in favor of profiling `training_step` ([#12032](https://github.com/PyTorchLightning/pytorch-lightning/pull/12032))

### Fixed

- Fixed an issue where `HorovodStrategy.teardown()` did not complete gracefully if an exception was thrown during callback setup [#11752](https://github.com/PyTorchLightning/pytorch-lightning/pull/11752)
Expand Down
2 changes: 0 additions & 2 deletions docs/source/advanced/profiler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ PyTorch Lightning supports profiling standard actions in the training loop out o
- on_train_epoch_start
- on_train_epoch_end
- on_train_batch_start
- model_forward
- model_backward
- on_after_backward
- optimizer_step
Expand Down Expand Up @@ -66,7 +65,6 @@ The profiler's results will be printed at the completion of a training ``trainer
| run_training_epoch | 6.1558 | 6.1558 |
| run_training_batch | 0.0022506 | 0.015754 |
| [LightningModule]BoringModel.optimizer_step | 0.0017477 | 0.012234 |
| model_forward | 0.00055868 | 0.0039108 |
| [LightningModule]BoringModel.val_dataloader | 0.00024388 | 0.00024388 |
| on_train_batch_start | 0.00014637 | 0.0010246 |
| [LightningModule]BoringModel.teardown | 2.15e-06 | 2.15e-06 |
Expand Down
36 changes: 17 additions & 19 deletions pytorch_lightning/loops/optimization/manual_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,30 +107,28 @@ def advance(self, batch: Any, batch_idx: int) -> None: # type: ignore[override]
assert self.trainer is not None
lightning_module = self.trainer.lightning_module

with self.trainer.profiler.profile("model_forward"):
step_kwargs = _build_training_step_kwargs(
lightning_module, self.trainer.optimizers, batch, batch_idx, opt_idx=None, hiddens=self._hiddens
)

step_kwargs = _build_training_step_kwargs(
lightning_module, self.trainer.optimizers, batch, batch_idx, opt_idx=None, hiddens=self._hiddens
)

# manually capture logged metrics
training_step_output = self.trainer._call_strategy_hook("training_step", *step_kwargs.values())
self.trainer.strategy.post_training_step()
# manually capture logged metrics
training_step_output = self.trainer._call_strategy_hook("training_step", *step_kwargs.values())
self.trainer.strategy.post_training_step()

del step_kwargs
del step_kwargs

model_output = self.trainer._call_lightning_module_hook("training_step_end", training_step_output)
strategy_output = self.trainer._call_strategy_hook("training_step_end", training_step_output)
training_step_output = strategy_output if model_output is None else model_output
self._hiddens = _extract_hiddens(training_step_output, lightning_module.truncated_bptt_steps)
model_output = self.trainer._call_lightning_module_hook("training_step_end", training_step_output)
strategy_output = self.trainer._call_strategy_hook("training_step_end", training_step_output)
training_step_output = strategy_output if model_output is None else model_output
self._hiddens = _extract_hiddens(training_step_output, lightning_module.truncated_bptt_steps)

result = self.output_result_cls.from_training_step_output(training_step_output)
result = self.output_result_cls.from_training_step_output(training_step_output)

if self.trainer.move_metrics_to_cpu:
# hiddens and the training step output are not moved as they are not considered "metrics"
# the user might need them on the correct device for an operation in `training_epoch_end`
assert self.trainer._results is not None
self.trainer._results.cpu()
if self.trainer.move_metrics_to_cpu:
# hiddens and the training step output are not moved as they are not considered "metrics"
# the user might need them on the correct device for an operation in `training_epoch_end`
assert self.trainer._results is not None
self.trainer._results.cpu()

self._done = True
self._output = result.asdict()
Expand Down
40 changes: 19 additions & 21 deletions pytorch_lightning/loops/optimization/optimizer_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,32 +414,30 @@ def _training_step(self, split_batch: Any, batch_idx: int, opt_idx: int) -> Clos
# give the PL module a result for logging
lightning_module = self.trainer.lightning_module

with self.trainer.profiler.profile("model_forward"):

step_kwargs = _build_training_step_kwargs(
lightning_module, self.trainer.optimizers, split_batch, batch_idx, opt_idx, self._hiddens
)
step_kwargs = _build_training_step_kwargs(
lightning_module, self.trainer.optimizers, split_batch, batch_idx, opt_idx, self._hiddens
)

# manually capture logged metrics
training_step_output = self.trainer._call_strategy_hook("training_step", *step_kwargs.values())
self.trainer.strategy.post_training_step()
# manually capture logged metrics
training_step_output = self.trainer._call_strategy_hook("training_step", *step_kwargs.values())
self.trainer.strategy.post_training_step()

model_output = self.trainer._call_lightning_module_hook("training_step_end", training_step_output)
strategy_output = self.trainer._call_strategy_hook("training_step_end", training_step_output)
training_step_output = strategy_output if model_output is None else model_output
model_output = self.trainer._call_lightning_module_hook("training_step_end", training_step_output)
strategy_output = self.trainer._call_strategy_hook("training_step_end", training_step_output)
training_step_output = strategy_output if model_output is None else model_output

self._hiddens = _extract_hiddens(training_step_output, lightning_module.truncated_bptt_steps)
self._hiddens = _extract_hiddens(training_step_output, lightning_module.truncated_bptt_steps)

result = self.output_result_cls.from_training_step_output(
training_step_output, self.trainer.accumulate_grad_batches
)
result = self.output_result_cls.from_training_step_output(
training_step_output, self.trainer.accumulate_grad_batches
)

if self.trainer._terminate_on_nan:
check_finite_loss(result.closure_loss)
if self.trainer._terminate_on_nan:
check_finite_loss(result.closure_loss)

if self.trainer.move_metrics_to_cpu:
# hiddens and the training step output are not moved as they are not considered "metrics"
assert self.trainer._results is not None
self.trainer._results.cpu()
if self.trainer.move_metrics_to_cpu:
# hiddens and the training step output are not moved as they are not considered "metrics"
assert self.trainer._results is not None
self.trainer._results.cpu()

return result

0 comments on commit 60fe152

Please sign in to comment.