Skip to content

Commit

Permalink
add configs for larger models (#297)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: #297

Test Plan: Imported from OSS

Reviewed By: edward-io

Differential Revision: D39159703

Pulled By: ankitade

fbshipit-source-id: 397c7917f7311ad8f0705f8087cfed25d082d7fd
  • Loading branch information
ankitade authored and facebook-github-bot committed Aug 31, 2022
1 parent cc9f99d commit 9d228b5
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 1 deletion.
80 changes: 80 additions & 0 deletions examples/flava/native/configs/1.8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
training:
strategy: fsdp # can be changed to ddp or fsdp
seed: 1337

batch_size: 8
num_workers: 4
prefetch_factor: 3

optimizer:
learning_rate: 1e-3
adam_eps: 1e-8
adam_weight_decay: 0.1
adam_betas: [0.9, 0.999]

warmup_steps: 10000
max_steps: 100000

validation_steps: 5000
log_interval: 10

enable_tf32: True
enable_amp: True
half_precision_format: "bfloat16" # or float16
enable_half_reduce_in_fsdp: True # handles the reduction across devices in half precision

activation_checkpointing: False
activation_checkpointing_reentrant: False # false for non-reentrant

datasets:
_target_: flava.definitions.TrainingDatasetsInfo
selected:
- image
- vl
- text
image:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: imagenet-1k
subset: default
text:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: wikitext
subset: wikitext-103-raw-v1
datamodule_extra_kwargs:
text_columns: ["text"]
vl:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
val:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
split_key_mapping:
validation: train

model:
image_num_hidden_layers: 32
image_hidden_size: 1280
image_intermediate_size: 5120
image_num_attention_heads: 16

text_num_hidden_layers: 32
text_hidden_size: 1280
text_intermediate_size: 5120
text_num_attention_heads: 16

multimodal_num_hidden_layers: 16
multimodal_hidden_size: 1280
multimodal_intermediate_size: 5120
multimodal_num_attention_heads: 16
80 changes: 80 additions & 0 deletions examples/flava/native/configs/2.7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
training:
strategy: fsdp # can be changed to ddp or fsdp
seed: 1337

batch_size: 8
num_workers: 4
prefetch_factor: 3

optimizer:
learning_rate: 1e-3
adam_eps: 1e-8
adam_weight_decay: 0.1
adam_betas: [0.9, 0.999]

warmup_steps: 10000
max_steps: 100000

validation_steps: 5000
log_interval: 10

enable_tf32: True
enable_amp: True
half_precision_format: "bfloat16" # or float16
enable_half_reduce_in_fsdp: True # handles the reduction across devices in half precision

activation_checkpointing: True
activation_checkpointing_reentrant: False # false for non-reentrant

datasets:
_target_: flava.definitions.TrainingDatasetsInfo
selected:
- image
- vl
- text
image:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: imagenet-1k
subset: default
text:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: wikitext
subset: wikitext-103-raw-v1
datamodule_extra_kwargs:
text_columns: ["text"]
vl:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
val:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
split_key_mapping:
validation: train

model:
image_num_hidden_layers: 40
image_hidden_size: 1408
image_intermediate_size: 6144
image_num_attention_heads: 16

text_num_hidden_layers: 40
text_hidden_size: 1408
text_intermediate_size: 6144
text_num_attention_heads: 16

multimodal_num_hidden_layers: 20
multimodal_hidden_size: 1408
multimodal_intermediate_size: 6144
multimodal_num_attention_heads: 16
80 changes: 80 additions & 0 deletions examples/flava/native/configs/900m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
training:
strategy: ddp # can be changed to ddp or fsdp
seed: 1337

batch_size: 8
num_workers: 4
prefetch_factor: 3

optimizer:
learning_rate: 1e-3
adam_eps: 1e-8
adam_weight_decay: 0.1
adam_betas: [0.9, 0.999]

warmup_steps: 10000
max_steps: 100000

validation_steps: 5000
log_interval: 10

enable_tf32: True
enable_amp: True
half_precision_format: "bfloat16" # or float16
enable_half_reduce_in_fsdp: True # handles the reduction across devices in half precision

activation_checkpointing: False
activation_checkpointing_reentrant: False # false for non-reentrant

datasets:
_target_: flava.definitions.TrainingDatasetsInfo
selected:
- image
- vl
- text
image:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: imagenet-1k
subset: default
text:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: wikitext
subset: wikitext-103-raw-v1
datamodule_extra_kwargs:
text_columns: ["text"]
vl:
_target_: flava.definitions.TrainingSingleDatasetInfo
train:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
val:
- _target_: flava.definitions.HFDatasetInfo
key: red_caps
subset: backpacking
rename_columns:
- ["caption", "text"]
split_key_mapping:
validation: train

model:
image_num_hidden_layers: 24
image_hidden_size: 1024
image_intermediate_size: 4096
image_num_attention_heads: 16

text_num_hidden_layers: 24
text_hidden_size: 1024
text_intermediate_size: 4096
text_num_attention_heads: 16

multimodal_num_hidden_layers: 12
multimodal_hidden_size: 1024
multimodal_intermediate_size: 4096
multimodal_num_attention_heads: 16
2 changes: 1 addition & 1 deletion examples/flava/native/configs/pretrain_debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ training:
warmup_steps: 10000
max_steps: 100000

validation_steps: 500
validation_steps: 5000
log_interval: 10

enable_tf32: True
Expand Down

0 comments on commit 9d228b5

Please sign in to comment.