Skip to content

Commit

Permalink
fix typo colossalai/auto_parallel autochunk fx/passes etc. (hpcaitech…
Browse files Browse the repository at this point in the history
  • Loading branch information
digger-yu authored May 24, 2023
1 parent 725365f commit 7f8203a
Show file tree
Hide file tree
Showing 19 changed files with 31 additions and 31 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
- [Compatibility Test on Dispatch](#compatibility-test-on-dispatch)
- [Release](#release)
- [User Friendliness](#user-friendliness)
- [Commmunity](#commmunity)
- [Community](#community)
- [Configuration](#configuration)
- [Progress Log](#progress-log)

Expand Down Expand Up @@ -97,7 +97,7 @@ This workflow is triggered by manually dispatching the workflow. It has the foll
| `Synchronize submodule` | `submodule.yml` | This workflow will check if any git submodule is updated. If so, it will create a PR to update the submodule pointers. |
| `Close inactive issues` | `close_inactive.yml` | This workflow will close issues which are stale for 14 days. |

### Commmunity
### Community

| Workflow Name | File name | Description |
| -------------------------------------------- | -------------------------------- | -------------------------------------------------------------------------------- |
Expand Down
2 changes: 1 addition & 1 deletion colossalai/auto_parallel/passes/meta_info_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def node_handler(self, node: Node) -> None:
graph_info.fwd_tmp = buffer_tensors
graph_info.fwd_out = output_tensors

# fetch other memory informations
# fetch other memory information
memory_cost = meta_info.memory_cost
graph_info.fwd_mem_tmp = memory_cost.fwd.temp
graph_info.fwd_mem_out = memory_cost.fwd.activation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def update_compute_cost(self, strategy: ShardingStrategy):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: a constant coefficient need to be added.
# 1D: (L) * N * Cin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def update_compute_cost(self, strategy: ShardingStrategy):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (L) * N * Cout * Cin * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def update_compute_cost(self, strategy: ShardingStrategy):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# TODO: a constant coefficient need to be added.

sharded_input_shape = strategy.sharding_specs[self.op_data['input']].get_sharded_shape_per_device()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
"""
NormalPoolStrategyGenerator is a generic class to generate strategies for pool operation like MaxPoolxd.
The reason we call this normal pool is AvgPoolxd and MaxPoolxd are taking the kernel size element from image,
and reduce them depening on the operation type.
and reduce them depending on the operation type.
"""

def validate(self) -> bool:
Expand All @@ -35,9 +35,9 @@ def update_compute_cost(self, strategy: ShardingStrategy) -> TrainCycleItem:
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (Lout) * N * C * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel
Expand Down
8 changes: 4 additions & 4 deletions colossalai/autochunk/trace_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ def flow_search(self, start_idx, start_dim, end_idx, end_dim):
# find non chunk inputs
chunk_info = self._get_non_chunk_inputs(chunk_info, start_idx, end_idx)

# reassgin reshape size, some size may have changed due to chunk
chunk_info = self._reassgin_reshape_size(chunk_info)
# reassign reshape size, some size may have changed due to chunk
chunk_info = self._reassign_reshape_size(chunk_info)

return chunk_info

Expand Down Expand Up @@ -428,10 +428,10 @@ def _update_chunk_info(self, chunk_info: Dict, new_all_node_info: Dict, output:
chunk_info["outputs_dim"].append(output_dim)
return True

def _reassgin_reshape_size(self, chunk_info):
def _reassign_reshape_size(self, chunk_info):
"""
Some shape args in reshape may have changed due to chunk
reassgin those changed shape
reassign those changed shape
"""
chunk_region = chunk_info["region"]
reshape_size = {}
Expand Down
4 changes: 2 additions & 2 deletions colossalai/autochunk/trace_indice.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def _assign_conv2d_indice(self, node: Node, node_idx: int) -> None:
input_node = node.args[0]
assert len(get_node_shape(input_node)) == 4

# assgin index
# assign index
self._assign_indice_as_input(node, node_idx, input_node)
self._del_dim(node_idx, 1)
self._add_dim(node_idx, 1)
Expand All @@ -415,7 +415,7 @@ def _assign_interpolate_indice(self, node: Node, node_idx: int) -> None:
assert node.kwargs['size'] is None
assert len(get_node_shape(node)) == 4

# assgin index
# assign index
self._assign_indice_as_input(node, node_idx)
self._mark_computation(node, node_idx, [-1, -2])

Expand Down
2 changes: 1 addition & 1 deletion colossalai/booster/plugin/gemini_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class GeminiPlugin(DPPluginBase):
Users can provide this argument to speed up searching.
If users do not know this argument before training, it is ok. We will use a default value 1024.
min_chunk_size_mb (float, optional): the minimum chunk size in MegaByte.
If the aggregate size of parameters is still samller than the minimum chunk size,
If the aggregate size of parameters is still smaller than the minimum chunk size,
all parameters will be compacted into one small chunk.
memstats (MemStats, optional) the memory statistics collector by a runtime memory tracer.
gpu_margin_mem_ratio (float, optional): The ratio of GPU remaining memory (after the first forward-backward)
Expand Down
2 changes: 1 addition & 1 deletion colossalai/cluster/dist_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def on_master_only(self, process_group: ProcessGroup = None):
"""
is_master = self.is_master(process_group)

# define an inner functiuon
# define an inner function
def decorator(func):

@functools.wraps(func)
Expand Down
2 changes: 1 addition & 1 deletion colossalai/device/alpha_beta_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def _extract_alpha_beta(pg, pg_handler):
first_latency, first_bandwidth = _extract_alpha_beta(first_axis, first_axis_process_group)
second_latency, second_bandwidth = _extract_alpha_beta(second_axis, second_axis_process_group)
mesh_alpha = [first_latency, second_latency]
# The beta values have been enlarged by 1e10 times temporarilly because the computation cost
# The beta values have been enlarged by 1e10 times temporarily because the computation cost
# is still estimated in the unit of TFLOPs instead of time. We will remove this factor in future.
mesh_beta = [1e10 / first_bandwidth, 1e10 / second_bandwidth]

Expand Down
4 changes: 2 additions & 2 deletions colossalai/engine/schedule/_pipeline_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ def _get_data_slice(self, data, offset):
raise TypeError(f"Expected data to be of type torch.Tensor, list, tuple, or dict, but got {type(data)}")

def load_micro_batch(self):
mciro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
micro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
self.microbatch_offset += self.microbatch_size
return self._move_to_device(mciro_batch_data)
return self._move_to_device(micro_batch_data)

def pre_processing(self, engine):
from colossalai.zero.legacy import ShardedModelV2
Expand Down
2 changes: 1 addition & 1 deletion colossalai/engine/schedule/_pipeline_schedule_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def forward_backward_step(self,
'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
self.load_batch(data_iter)

# num_warmup_microbatches is the step when not all the processers are working
# num_warmup_microbatches is the step when not all the processes are working
num_warmup_microbatches = \
(gpc.get_world_size(ParallelMode.PIPELINE)
- gpc.get_local_rank(ParallelMode.PIPELINE) - 1)
Expand Down
2 changes: 1 addition & 1 deletion colossalai/fx/codegen/activation_checkpoint_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,
# append code text to body
for idx, node in enumerate(node_list):
# if this is the first node of the ckpt region
# append the ckpt function defition
# append the ckpt function definition
if idx in start_idx:
label = start_idx.index(idx)
ckpt_fn_def = _gen_ckpt_fn_def(label, input_vars[label])
Expand Down
2 changes: 1 addition & 1 deletion colossalai/fx/passes/adding_split_node_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def avgcompute_split_pass(gm: torch.fx.GraphModule, pp_size: int):

def avgnode_split_pass(gm: torch.fx.GraphModule, pp_size: int):
"""
In avgnode_split_pass, simpliy split graph by node number.
In avgnode_split_pass, simply split graph by node number.
"""
mod_graph = gm.graph
avg_num_node = len(mod_graph.nodes) // pp_size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def apply(*args, **kwargs):
return shape_consistency_manager.apply(*args, **kwargs)


def solution_annotatation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
def solution_annotation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
mod_graph = gm.graph
nodes = tuple(mod_graph.nodes)

Expand Down
2 changes: 1 addition & 1 deletion colossalai/fx/passes/meta_info_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class TensorMetadata(NamedTuple):
numel: int
is_tensor: bool
# TODO: we can add a list of sharding spec here, and record the sharding
# behaviour by appending sharding spec into list.
# behavior by appending sharding spec into list.


def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata:
Expand Down
4 changes: 2 additions & 2 deletions colossalai/fx/passes/passes_for_gpt2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def record_cross_partition_use(def_node: torch.fx.node.Node,
use_partition.partitions_dependent_on.setdefault(def_partition_name)

node_process_list = list(m.graph.nodes)
# split nodes into parititons
# split nodes into partitions
while node_process_list:
node = node_process_list.pop(0)
orig_nodes[node.name] = node
Expand Down Expand Up @@ -277,7 +277,7 @@ def record_cross_partition_use(def_node: torch.fx.node.Node,
if len(sorted_partitions) != len(partitions):
raise RuntimeError("cycle exists between partitions!")

# add placeholders to parititons
# add placeholders to partitions
for partition_name in sorted_partitions:
partition = partitions[partition_name]
for input in partition.inputs:
Expand Down
4 changes: 2 additions & 2 deletions colossalai/fx/passes/split_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def __repr__(self) -> str:
f" nodes: {self.node_names},\n" \
f" inputs: {self.inputs},\n" \
f" outputs: {self.outputs},\n" \
f" partitions depenent on: {self.partitions_dependent_on},\n" \
f" parition dependents: {self.partition_dependents}"
f" partitions dependent on: {self.partitions_dependent_on},\n" \
f" partition dependents: {self.partition_dependents}"


# Creates subgraphs out of main graph
Expand Down

0 comments on commit 7f8203a

Please sign in to comment.