update

allenai · jacob-morrison · Aug 16, 2024 · Aug 16, 2024 · Aug 17, 2024 · Aug 17, 2024
commit c5b9c0fcf08fb461309e0f3414c000c1de7b1649
diff --git a/configs/beaker_configs/default_finetune.yaml b/configs/beaker_configs/default_finetune.yaml
@@ -52,6 +52,9 @@ tasks:
       - mountPath: /oe-adapt-default
         source:
           weka: oe-adapt-default
+      - mountPath: /oe-training-default
+        source:
+          weka: oe-training-default
     result:
       path: /output
     resources:

diff --git a/configs/beaker_configs/default_finetune_multinode.yaml b/configs/beaker_configs/default_finetune_multinode.yaml
@@ -66,6 +66,9 @@ tasks:
       - mountPath: /oe-adapt-default
         source:
           weka: oe-adapt-default
+      - mountPath: /model
+        source:
+          beaker: jacobm/llama-3.1-8b
     result:
       path: /output
     resources:

diff --git a/configs/train_configs/dpo/my-test-dpo.yaml b/configs/train_configs/dpo/my-test-dpo.yaml
@@ -1,5 +1,5 @@
-model_name_or_path: /oe-adapt-default/jacobm/tulu-3-dev/checkpoints/base_models/L3.1-8B-v3.9-nc-1
-tokenizer_name: /oe-adapt-default/jacobm/tulu-3-dev/checkpoints/base_models/L3.1-8B-v3.9-nc-1
+model_name_or_path: /model
+tokenizer_name: /model
 model_revision: main
 use_flash_attn: true
 gradient_checkpointing: true
@@ -8,6 +8,8 @@ gradient_checkpointing: true
 dataset_mixer:
   ai2-adapt-dev/tulu3.4-sft-replica-50k-gpt4-prefs-on-policy: 1.0
   ai2-adapt-dev/personahub_if_pref_data_manualseed_v2_19890: 1.0
+  ai2-adapt-dev/helpsteer2-uf-pipeline-regen: 1.0
+  allenai/ultrafeedback_binarized_cleaned_train: 1.0
 use_slow_tokenizer: true
 max_seq_length: 2048
 preprocessing_num_workers: 16

diff --git a/configs/train_configs/sft/peteish_1124_preview_mix_v3.9.yaml b/configs/train_configs/sft/peteish_1124_preview_mix_v3.9.yaml
@@ -0,0 +1,59 @@
+model_name_or_path: /oe-training-default/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-moremath-dclm07-fw2-se-flan/step11931-hf
+model_revision: main
+use_flash_attn: true
+tokenizer_name: /oe-training-default/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-moremath-dclm07-fw2-se-flan/step11931-hf
+use_slow_tokenizer: true
+dataset_mixer:
+    # Static v3.9 nc mix file
+    # WIP
+
+    # Static v3.9 huggingface dataset
+    allenai/tulu-v.3.9-mix-preview-noncommercial: 1.0
+
+    # # General datasets:
+    # ai2-adapt-dev/oasst1_converted: 1.0 # 7132 # all
+    # ai2-adapt-dev/flan_v2_converted: 1.0 # 89982 # all
+    # ai2-adapt-dev/tulu_hard_coded_repeated_10: 1.0 # 240 # all
+    # ai2-adapt-dev/no_robots_converted: 1.0 # 9500 # all
+    # ai2-adapt-dev/tulu_v3.9_wildchat_100k: 1.0
+
+    # # Math datasets:
+    # ai2-adapt-dev/personahub_math_v5_regen_149960: 1.0 # 149960 # all
+    # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0 # 49980 # all
+    # ai2-adapt-dev/tulu_v3.9_open_math_2_gsm8k_50k: 1.0 
+    # ai2-adapt-dev/numinamath_tir_math_decontaminated: 1.0
+    # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0
+
+    # # Coding datasets:
+    # ai2-adapt-dev/personahub_code_v2_34999: 1.0 # 34999 # all
+    # ai2-adapt-dev/evol_codealpaca_heval_decontaminated: 1.0 # 107276 # all
+
+    # # IF datasets:
+    # ai2-adapt-dev/personahub_ifdata_manual_seed_v3_29980: 1.0 # 29980 # all
+
+    # # Safety datasets:
+    # ai2-adapt-dev/coconot_converted: 1.0 # 10983 # all
+    # ai2-adapt-dev/tulu_v3.9_wildjailbreak_decontaminated_50k: 1.0
+    # ai2-adapt-dev/tulu_v3.9_synthetic_finalresp_wildguardmixtrain_decontaminated_50k: 1.0
+
+    # # Specialty datasets:
+    # ai2-adapt-dev/tulu_v3.9_sciriff_10k: 1.0
+    # ai2-adapt-dev/tulu_v3.9_table_gpt_5k: 1.0
+    # ai2-adapt-dev/tulu_v3.9_aya_100k: 1.0
+
+max_seq_length: 4096 # need to increase to 8k
+preprocessing_num_workers: 128
+per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
+gradient_accumulation_steps: 16 # effective batch size 128 with 4 nodes
+learning_rate: 2.0e-06
+lr_scheduler_type: linear
+warmup_ratio: 0.03
+weight_decay: 0.0
+num_train_epochs: 2
+output_dir: /output/
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+checkpointing_steps: epoch
+dataset_mix_dir: /output/
diff --git a/configs/train_configs/sft/tulu3_70b_preview_mix_v3.9-noncommercial.yaml b/configs/train_configs/sft/tulu3_70b_preview_mix_v3.9-noncommercial.yaml
@@ -19,7 +19,7 @@ dataset_mixer:
 
     # # Math datasets:
     # ai2-adapt-dev/personahub_math_v5_regen_149960: 1.0 # 149960 # all
-    # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0 # 49980 # all
+    # allenai/tulu-3-sft-personas-math-grade: 1.0 # 49980 # all
     # ai2-adapt-dev/tulu_v3.9_open_math_2_gsm8k_50k: 1.0 
     # ai2-adapt-dev/numinamath_tir_math_decontaminated: 1.0
     # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0
@@ -44,7 +44,7 @@ dataset_mixer:
 max_seq_length: 4096
 preprocessing_num_workers: 128
 per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
-gradient_accumulation_steps: 2 # effective batch size 128 with 8 nodes
+gradient_accumulation_steps: 1 # effective batch size 128 with 8 nodes
 learning_rate: 2.0e-06
 lr_scheduler_type: linear
 warmup_ratio: 0.03

diff --git a/configs/train_configs/sft/tulu3_8b_preview_mix_v3.9-noncommercial-augusta.yaml b/configs/train_configs/sft/tulu3_8b_preview_mix_v3.9-noncommercial-augusta.yaml
@@ -0,0 +1,59 @@
+model_name_or_path: /model
+model_revision: main
+use_flash_attn: true
+tokenizer_name: /model
+use_slow_tokenizer: true
+dataset_mixer:
+    # Static v3.9 nc mix file
+    # WIP
+
+    # Static v3.9 huggingface dataset
+    # allenai/tulu-v.3.9-mix-preview-noncommercial: 1.0
+
+    # General datasets:
+    ai2-adapt-dev/oasst1_converted: 1.0 # 7132 # all
+    ai2-adapt-dev/flan_v2_converted: 1.0 # 89982 # all
+    ai2-adapt-dev/tulu_hard_coded_repeated_10: 1.0 # 240 # all
+    ai2-adapt-dev/no_robots_converted: 1.0 # 9500 # all
+    ai2-adapt-dev/tulu_v3.9_wildchat_100k: 1.0
+
+    # Math datasets:
+    ai2-adapt-dev/personahub_math_v5_regen_149960: 1.0 # 149960 # all
+    allenai/tulu-3-sft-personas-math-grade: 1.0 # 49980 # all
+    ai2-adapt-dev/tulu_v3.9_open_math_2_gsm8k_50k: 1.0 
+    ai2-adapt-dev/numinamath_tir_math_decontaminated: 1.0
+    ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0
+
+    # Coding datasets:
+    ai2-adapt-dev/personahub_code_v2_34999: 1.0 # 34999 # all
+    ai2-adapt-dev/evol_codealpaca_heval_decontaminated: 1.0 # 107276 # all
+
+    # IF datasets:
+    ai2-adapt-dev/personahub_ifdata_manual_seed_v3_29980: 1.0 # 29980 # all
+
+    # Safety datasets:
+    ai2-adapt-dev/coconot_converted: 1.0 # 10983 # all
+    ai2-adapt-dev/tulu_v3.9_wildjailbreak_decontaminated_50k: 1.0
+    ai2-adapt-dev/tulu_v3.9_synthetic_finalresp_wildguardmixtrain_decontaminated_50k: 1.0
+
+    # Specialty datasets:
+    ai2-adapt-dev/tulu_v3.9_sciriff_10k: 1.0
+    ai2-adapt-dev/tulu_v3.9_table_gpt_5k: 1.0
+    ai2-adapt-dev/tulu_v3.9_aya_100k: 1.0
+
+max_seq_length: 4096 # need to increase to 8k
+preprocessing_num_workers: 128
+per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
+gradient_accumulation_steps: 2 # effective batch size 128 with 4 nodes
+learning_rate: 5.0e-06
+lr_scheduler_type: linear
+warmup_ratio: 0.03
+weight_decay: 0.0
+num_train_epochs: 2
+output_dir: /output/
+with_tracking: true
+report_to:
+  - wandb
+logging_steps: 1
+checkpointing_steps: epoch
+dataset_mix_dir: /output/
diff --git a/configs/train_configs/sft/tulu3_8b_preview_mix_v3.9-noncommercial.yaml b/configs/train_configs/sft/tulu3_8b_preview_mix_v3.9-noncommercial.yaml
@@ -19,7 +19,7 @@ dataset_mixer:
 
     # # Math datasets:
     # ai2-adapt-dev/personahub_math_v5_regen_149960: 1.0 # 149960 # all
-    # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0 # 49980 # all
+    # allenai/tulu-3-sft-personas-math-grade: 1.0 # 49980 # all
     # ai2-adapt-dev/tulu_v3.9_open_math_2_gsm8k_50k: 1.0 
     # ai2-adapt-dev/numinamath_tir_math_decontaminated: 1.0
     # ai2-adapt-dev/tulu_v3.9_personahub_math_interm_algebra_20k: 1.0

diff --git a/scripts/submit_finetune_job.py b/scripts/submit_finetune_job.py
@@ -166,7 +166,7 @@ def parse_args(args):
     d['tasks'][0]['arguments'][0] = new_arguments
 
     # name and description
-    exp_name = f"open_instruct_finetune_{model_name}_{now}"
+    exp_name = f"open_instruct_finetune_{model_name}_{now}"[:128]
     d['description'] = exp_name
     d['tasks'][0]['name'] = exp_name