From 93b343e0b11da7e9af11d51817642f0f50448cd4 Mon Sep 17 00:00:00 2001
From: S22 <864453277@qq.com>
Date: Tue, 3 Sep 2024 19:13:20 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20version=200.4.0=20(#141)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
1. Modify data processing: standardize data formats, add metadata,
refactor dataset classes, and remove data normalization operations. 2.
Add a Scaler class: manage data normalization and denormalization in an
extensible way. 3. Delete the loss folder and merge its contents into
metrics. 4. Modify the Runner: streamline training and inference
interfaces, add post-training inference functionality, and add the
ability to save inference results and evaluation metrics. 5. Add an
Examples folder: provide all configuration options with explanations,
typical configuration files, and an example model structure (MLP). 6.
Modify training and inference scripts. 7. Add detailed tutorials. 8,
Refine code details. 9.Update requirements.txt. 10. Update README.md.
---
.gitignore | 4 +-
.pylintrc | 6 +-
README.md | 149 +---
baselines/AGCRN/METR-LA.py | 158 ++--
baselines/AGCRN/PEMS-BAY.py | 158 ++--
baselines/AGCRN/PEMS03.py | 158 ++--
baselines/AGCRN/PEMS04.py | 158 ++--
baselines/AGCRN/PEMS07.py | 158 ++--
baselines/AGCRN/PEMS08.py | 160 ++--
baselines/AGCRN/run.sh | 7 -
baselines/Autoformer/ETTh1.py | 158 ++--
baselines/Autoformer/ETTh2.py | 158 ++--
baselines/Autoformer/ETTm1.py | 160 ++--
baselines/Autoformer/ETTm2.py | 160 ++--
baselines/Autoformer/Electricity.py | 169 +++--
baselines/Autoformer/ExchangeRate.py | 164 +++--
baselines/Autoformer/PEMS04.py | 131 ----
baselines/Autoformer/PEMS04_LTSF.py | 155 ++++
baselines/Autoformer/PEMS08.py | 131 ----
baselines/Autoformer/PEMS08_LTSF.py | 155 ++++
baselines/Autoformer/Weather.py | 162 ++--
baselines/Autoformer/run.sh | 10 -
baselines/BGSLF/BGSLF_METR-LA.py | 143 ----
baselines/BGSLF/arch/__init__.py | 1 -
baselines/BGSLF/arch/cell.py | 208 ------
baselines/BGSLF/arch/model.py | 258 -------
baselines/Crossformer/ETTh1.py | 155 ++--
baselines/Crossformer/ETTh2.py | 117 ---
baselines/Crossformer/ETTm1.py | 163 +++--
baselines/Crossformer/ETTm2.py | 120 ---
baselines/Crossformer/Electricity.py | 156 ++--
baselines/Crossformer/ExchangeRate.py | 117 ---
baselines/Crossformer/PEMS04.py | 119 ---
baselines/Crossformer/PEMS04_LTSF.py | 145 ++++
baselines/Crossformer/PEMS08.py | 119 ---
baselines/Crossformer/PEMS08_LTSF.py | 145 ++++
baselines/Crossformer/Weather.py | 157 ++--
baselines/Crossformer/arch/attn.py | 6 +-
baselines/Crossformer/arch/cross_decoder.py | 8 +-
baselines/Crossformer/arch/cross_embed.py | 2 +-
baselines/Crossformer/arch/cross_encoder.py | 12 +-
.../Crossformer/arch/crossformer_arch.py | 4 +-
baselines/Crossformer/run.sh | 10 -
baselines/D2STGNN/METR-LA.py | 169 +++--
baselines/D2STGNN/PEMS-BAY.py | 169 +++--
baselines/D2STGNN/PEMS03.py | 171 +++--
baselines/D2STGNN/PEMS04.py | 171 +++--
baselines/D2STGNN/PEMS07.py | 171 +++--
baselines/D2STGNN/PEMS08.py | 171 +++--
baselines/D2STGNN/arch/d2stgnn_arch.py | 2 +-
.../arch/dynamic_graph_conv/utils/mask.py | 2 +-
baselines/D2STGNN/run.sh | 7 -
baselines/DCRNN/METR-LA.py | 173 +++--
baselines/DCRNN/PEMS-BAY.py | 175 +++--
baselines/DCRNN/PEMS03.py | 169 +++--
baselines/DCRNN/PEMS04.py | 175 +++--
baselines/DCRNN/PEMS07.py | 171 +++--
baselines/DCRNN/PEMS08.py | 171 +++--
baselines/DCRNN/run.sh | 7 -
baselines/DGCRN/METR-LA.py | 169 +++--
baselines/DGCRN/PEMS-BAY.py | 170 +++--
baselines/DGCRN/PEMS03.py | 169 +++--
baselines/DGCRN/PEMS04.py | 169 +++--
baselines/DGCRN/PEMS07.py | 169 +++--
baselines/DGCRN/PEMS08.py | 169 +++--
baselines/DGCRN/run.sh | 7 -
baselines/DGCRN/runner/dgcrn_runner.py | 2 +-
baselines/DLinear/ETTh1.py | 151 ++--
baselines/DLinear/ETTh2.py | 151 ++--
baselines/DLinear/ETTm1.py | 151 ++--
baselines/DLinear/ETTm2.py | 151 ++--
baselines/DLinear/Electricity.py | 151 ++--
baselines/DLinear/ExchangeRate.py | 151 ++--
baselines/DLinear/METR-LA.py | 109 ---
baselines/DLinear/PEMS-BAY.py | 109 ---
baselines/DLinear/PEMS04.py | 109 ---
baselines/DLinear/PEMS04_LTSF.py | 138 ++++
baselines/DLinear/PEMS08.py | 109 ---
baselines/DLinear/PEMS08_LTSF.py | 138 ++++
baselines/DLinear/Weather.py | 151 ++--
baselines/DLinear/run.sh | 10 -
baselines/DSFormer/ETTh1.py | 158 ++--
baselines/DSFormer/ETTh2.py | 158 ++--
baselines/DSFormer/ETTm1.py | 161 ++--
baselines/DSFormer/ETTm2.py | 160 ++--
baselines/DSFormer/Electricity.py | 158 ++--
baselines/DSFormer/ExchangeRate.py | 159 ++--
baselines/DSFormer/Illness.py | 163 +++--
baselines/DSFormer/METR-LA.py | 118 ---
baselines/DSFormer/PEMS04.py | 118 ---
baselines/DSFormer/PEMS04_LTSF.py | 144 ++++
baselines/DSFormer/PEMS08.py | 117 ---
baselines/DSFormer/PEMS08_LTSF.py | 144 ++++
baselines/DSFormer/Traffic.py | 166 +++--
baselines/DSFormer/Weather.py | 165 +++--
baselines/DSFormer/run.sh | 12 -
baselines/DeepAR/ETTh1.py | 159 ++--
baselines/DeepAR/ETTm1.py | 159 ++--
baselines/DeepAR/Electricity.py | 159 ++--
baselines/DeepAR/ExchangeRate.py | 160 ++--
baselines/DeepAR/METR-LA.py | 104 ---
baselines/DeepAR/PEMS-BAY.py | 104 ---
baselines/DeepAR/PEMS03.py | 104 ---
baselines/DeepAR/PEMS04.py | 163 +++--
baselines/DeepAR/PEMS04_LTSF.py | 169 +++--
baselines/DeepAR/PEMS07.py | 104 ---
baselines/DeepAR/PEMS08.py | 165 +++--
baselines/DeepAR/PEMS08_LTSF.py | 167 +++--
baselines/DeepAR/Weather.py | 159 ++--
baselines/DeepAR/run.sh | 15 -
baselines/DeepAR/runner/deepar_runner.py | 104 ++-
baselines/DeepAR_M4/M4.py | 108 ---
baselines/DeepAR_M4/arch/__init__.py | 1 -
baselines/DeepAR_M4/arch/deepar.py | 120 ---
baselines/DeepAR_M4/arch/distributions.py | 22 -
baselines/DeepAR_M4/loss/__init__.py | 1 -
baselines/DeepAR_M4/loss/gaussian.py | 36 -
baselines/DeepAR_M4/runner/__init__.py | 1 -
baselines/DeepAR_M4/runner/deepar_runner.py | 141 ----
baselines/FEDformer/ETTh1.py | 160 ++--
baselines/FEDformer/ETTh2.py | 161 ++--
baselines/FEDformer/ETTm1.py | 162 ++--
baselines/FEDformer/ETTm2.py | 164 +++--
baselines/FEDformer/Electricity.py | 160 ++--
baselines/FEDformer/ExchangeRate.py | 162 ++--
baselines/FEDformer/PEMS04.py | 134 ----
baselines/FEDformer/PEMS04_LTSF.py | 156 ++++
baselines/FEDformer/PEMS08.py | 134 ----
baselines/FEDformer/PEMS08_LTSF.py | 156 ++++
baselines/FEDformer/Weather.py | 160 ++--
baselines/FEDformer/arch/fedformer_arch.py | 2 +-
.../FEDformer/arch/fourier_correlation.py | 4 +-
baselines/FEDformer/arch/utils.py | 62 +-
baselines/FEDformer/run.sh | 10 -
baselines/GMSDR/METR-LA.py | 130 ----
baselines/GMSDR/PEMS-BAY.py | 130 ----
baselines/GMSDR/PEMS03.py | 130 ----
baselines/GMSDR/PEMS04.py | 130 ----
baselines/GMSDR/PEMS07.py | 130 ----
baselines/GMSDR/PEMS08.py | 130 ----
baselines/GMSDR/arch/__init__.py | 3 -
baselines/GMSDR/arch/gmsdr_arch.py | 164 -----
baselines/GMSDR/arch/gmsdr_cell.py | 184 -----
baselines/GMSDR/run.sh | 7 -
baselines/GTS/METR-LA.py | 182 +++--
baselines/GTS/PEMS-BAY.py | 182 +++--
baselines/GTS/PEMS03.py | 182 +++--
baselines/GTS/PEMS04.py | 182 +++--
baselines/GTS/PEMS07.py | 182 +++--
baselines/GTS/PEMS08.py | 182 +++--
baselines/GTS/arch/gts_arch.py | 4 +-
baselines/GTS/run.sh | 7 -
baselines/GWNet/METR-LA.py | 163 +++--
baselines/GWNet/PEMS-BAY.py | 163 +++--
baselines/GWNet/PEMS03.py | 165 +++--
baselines/GWNet/PEMS04.py | 165 +++--
baselines/GWNet/PEMS07.py | 165 +++--
baselines/GWNet/PEMS08.py | 165 +++--
baselines/GWNet/run.sh | 7 -
baselines/HI/HI_METR-LA_in96_out96.py | 106 ---
baselines/HI/METR-LA.py | 138 ++++
baselines/HI/arch/hi_arch.py | 2 +-
baselines/Informer/ETTh1.py | 160 ++--
baselines/Informer/ETTh2.py | 162 ++--
baselines/Informer/ETTm1.py | 162 ++--
baselines/Informer/ETTm2.py | 163 +++--
baselines/Informer/Electricity.py | 160 ++--
baselines/Informer/ExchangeRate.py | 160 ++--
baselines/Informer/METR-LA.py | 136 ----
baselines/Informer/PEMS-BAY.py | 136 ----
baselines/Informer/PEMS04.py | 136 ----
baselines/Informer/PEMS04_LTSF.py | 160 ++++
baselines/Informer/PEMS08.py | 136 ----
baselines/Informer/PEMS08_LTSF.py | 160 ++++
baselines/Informer/Weather.py | 162 ++--
baselines/Informer/arch/informer_arch.py | 8 +-
baselines/Informer/arch/masking.py | 2 +-
baselines/Informer/run.sh | 10 -
baselines/LSTM/CA.py | 117 ---
baselines/LSTM/GBA.py | 117 ---
baselines/LSTM/GLA.py | 117 ---
baselines/LSTM/SD.py | 117 ---
baselines/LSTM/arch/__init__.py | 1 -
baselines/LSTM/arch/lstm_arch.py | 49 --
baselines/LightGBM/README.md | 5 -
baselines/LightGBM/Weather.py | 21 -
baselines/LightGBM/evaluate.py | 80 --
baselines/LightGBM/evaluate_ar.py | 96 ---
baselines/LightGBM/evaluate_m4_ar.py | 90 ---
baselines/LightGBM/run.sh | 16 -
baselines/Linear/ETTh1.py | 108 ---
baselines/Linear/ETTh2.py | 108 ---
baselines/Linear/ETTm1.py | 108 ---
baselines/Linear/ETTm2.py | 108 ---
baselines/Linear/Electricity.py | 108 ---
baselines/Linear/ExchangeRate.py | 108 ---
baselines/Linear/METR-LA.py | 109 ---
baselines/Linear/PEMS-BAY.py | 109 ---
baselines/Linear/PEMS04.py | 109 ---
baselines/Linear/PEMS08.py | 109 ---
baselines/Linear/Weather.py | 108 ---
baselines/Linear/arch/__init__.py | 1 -
baselines/Linear/arch/linear.py | 30 -
baselines/Linear/run.sh | 10 -
baselines/MLP/M4.py | 105 ---
baselines/MLP/MLP_METR-LA.py | 113 ---
baselines/MLP/mlp_arch.py | 25 -
baselines/MTGNN/METR-LA.py | 173 +++--
baselines/MTGNN/PEMS-BAY.py | 173 +++--
baselines/MTGNN/PEMS03.py | 173 +++--
baselines/MTGNN/PEMS04.py | 173 +++--
baselines/MTGNN/PEMS07.py | 173 +++--
baselines/MTGNN/PEMS08.py | 173 +++--
baselines/MTGNN/run.sh | 7 -
baselines/MTGNN/runner/mtgnn_runner.py | 40 +-
baselines/MegaCRN/METR-LA.py | 148 ++++
baselines/MegaCRN/MegaCRN_METR-LA.py | 114 ---
baselines/MegaCRN/arch/megacrn_arch.py | 28 +-
baselines/MegaCRN/loss/loss.py | 2 +-
baselines/NBeats/ETTh1.py | 162 ++--
baselines/NBeats/ETTm1.py | 162 ++--
baselines/NBeats/Electricity.py | 161 ++--
baselines/NBeats/ExchangeRate.py | 162 ++--
baselines/NBeats/METR-LA.py | 116 ---
baselines/NBeats/PEMS-BAY.py | 116 ---
baselines/NBeats/PEMS03.py | 116 ---
baselines/NBeats/PEMS04.py | 116 ---
baselines/NBeats/PEMS04_LTSF.py | 166 +++--
baselines/NBeats/PEMS07.py | 116 ---
baselines/NBeats/PEMS08.py | 116 ---
baselines/NBeats/PEMS08_LTSF.py | 166 +++--
baselines/NBeats/Weather.py | 162 ++--
baselines/NBeats/arch/nbeats.py | 6 +-
baselines/NBeats/run.sh | 15 -
baselines/NBeats_M4/M4.py | 120 ---
baselines/NBeats_M4/arch/__init__.py | 1 -
baselines/NBeats_M4/arch/nbeats.py | 197 -----
baselines/NHiTS/ETTm2.py | 165 +++--
baselines/NHiTS/arch/nhits.py | 2 +-
baselines/NLinear/ETTh1.py | 150 ++--
baselines/NLinear/ETTh2.py | 150 ++--
baselines/NLinear/ETTm1.py | 150 ++--
baselines/NLinear/ETTm2.py | 150 ++--
baselines/NLinear/Electricity.py | 150 ++--
baselines/NLinear/ExchangeRate.py | 150 ++--
baselines/NLinear/METR-LA.py | 109 ---
baselines/NLinear/PEMS-BAY.py | 109 ---
baselines/NLinear/PEMS04.py | 109 ---
baselines/NLinear/PEMS04_LTSF.py | 137 ++++
baselines/NLinear/PEMS08.py | 109 ---
baselines/NLinear/PEMS08_LTSF.py | 137 ++++
baselines/NLinear/Weather.py | 150 ++--
baselines/NLinear/run.sh | 10 -
baselines/PatchTST/ETTh1.py | 162 ++--
baselines/PatchTST/ETTh2.py | 162 ++--
baselines/PatchTST/ETTm1.py | 162 ++--
baselines/PatchTST/ETTm2.py | 174 +++--
baselines/PatchTST/Electricity.py | 164 +++--
baselines/PatchTST/ExchangeRate.py | 162 ++--
baselines/PatchTST/M4.py | 119 ---
baselines/PatchTST/PEMS04.py | 130 ----
baselines/PatchTST/PEMS04_LTSF.py | 154 ++++
baselines/PatchTST/PEMS08.py | 130 ----
baselines/PatchTST/PEMS08_LTSF.py | 154 ++++
baselines/PatchTST/Weather.py | 162 ++--
baselines/PatchTST/arch/patchtst_arch.py | 22 +-
baselines/PatchTST/arch/patchtst_backbone.py | 48 +-
baselines/PatchTST/arch/patchtst_layers.py | 12 +-
baselines/PatchTST/run.sh | 10 -
baselines/Pyraformer/ETTh1.py | 167 +++--
baselines/Pyraformer/ETTh2.py | 165 +++--
baselines/Pyraformer/ETTm1.py | 169 +++--
baselines/Pyraformer/ETTm2.py | 169 +++--
baselines/Pyraformer/Electricity.py | 168 +++--
baselines/Pyraformer/ExchangeRate.py | 166 +++--
baselines/Pyraformer/PEMS04.py | 126 ----
baselines/Pyraformer/PEMS04_LTSF.py | 157 ++++
baselines/Pyraformer/PEMS08.py | 126 ----
baselines/Pyraformer/PEMS08_LTSF.py | 157 ++++
baselines/Pyraformer/Weather.py | 166 +++--
baselines/Pyraformer/run.sh | 10 -
baselines/STAEformer/METR-LA.py | 168 +++--
baselines/STAEformer/PEMS-BAY.py | 168 +++--
baselines/STAEformer/PEMS03.py | 172 +++--
baselines/STAEformer/PEMS04.py | 172 +++--
baselines/STAEformer/PEMS07.py | 174 +++--
baselines/STAEformer/PEMS08.py | 172 +++--
baselines/STAEformer/run.sh | 7 -
baselines/STEP/README.md | 3 +-
baselines/STEP/STEP_METR-LA.py | 178 +++--
baselines/STEP/STEP_PEMS-BAY.py | 160 ----
baselines/STEP/STEP_PEMS03.py | 155 ----
baselines/STEP/STEP_PEMS04.py | 155 ----
baselines/STEP/STEP_PEMS07.py | 155 ----
baselines/STEP/STEP_PEMS08.py | 155 ----
baselines/STEP/TSFormer_METR-LA.py | 152 ++++
.../STEP/{step_arch => arch}/__init__.py | 0
.../discrete_graph_learning.py | 4 +-
.../graphwavenet/__init__.py | 0
.../{step_arch => arch}/graphwavenet/model.py | 0
.../STEP/{step_arch => arch}/similarity.py | 0
baselines/STEP/{step_arch => arch}/step.py | 26 +-
.../{step_arch => arch}/tsformer/__init__.py | 0
.../STEP/{step_arch => arch}/tsformer/mask.py | 0
.../{step_arch => arch}/tsformer/patch.py | 0
.../tsformer/positional_encoding.py | 0
.../tsformer/transformer_layers.py | 0
.../{step_arch => arch}/tsformer/tsformer.py | 2 +-
.../STEP/{step_loss => loss}/__init__.py | 0
.../STEP/{step_loss => loss}/step_loss.py | 2 +-
baselines/STEP/runner/__init__.py | 3 +
.../tsformer_runner.py} | 48 +-
baselines/STEP/step_data/__init__.py | 4 -
.../STEP/step_data/forecasting_dataset.py | 80 --
.../STEP/step_data/pretraining_dataset.py | 1 -
baselines/STEP/step_runner/__init__.py | 3 -
baselines/STGCN/METR-LA.py | 165 +++--
baselines/STGCN/PEMS-BAY.py | 165 +++--
baselines/STGCN/PEMS03.py | 167 +++--
baselines/STGCN/PEMS04.py | 167 +++--
baselines/STGCN/PEMS07.py | 167 +++--
baselines/STGCN/PEMS08.py | 169 +++--
baselines/STGCN/run.sh | 7 -
baselines/STGODE/METR-LA.py | 166 +++--
baselines/STGODE/PEMS-BAY.py | 166 +++--
baselines/STGODE/PEMS04.py | 166 +++--
baselines/STGODE/PEMS08.py | 166 +++--
baselines/STGODE/generate_matrices.py | 14 +-
baselines/STID/CA.py | 165 +++--
baselines/STID/ETTh1.py | 164 +++--
baselines/STID/ETTh2.py | 170 +++--
baselines/STID/ETTm1.py | 166 +++--
baselines/STID/ETTm2.py | 166 +++--
baselines/STID/Electricity.py | 167 +++--
baselines/STID/ExchangeRate.py | 162 ++--
baselines/STID/GBA.py | 162 ++--
baselines/STID/GLA.py | 162 ++--
baselines/STID/Illness.py | 161 ++--
baselines/STID/METR-LA.py | 160 ++--
baselines/STID/PEMS-BAY.py | 160 ++--
baselines/STID/PEMS03.py | 160 ++--
baselines/STID/PEMS04.py | 160 ++--
baselines/STID/PEMS07.py | 160 ++--
baselines/STID/PEMS08.py | 160 ++--
baselines/STID/SD.py | 164 +++--
baselines/STID/Traffic.py | 162 ++--
baselines/STID/Weather.py | 160 ++--
baselines/STID/run.sh | 8 -
baselines/STID_M4/M4.py | 115 ---
baselines/STID_M4/arch/__init__.py | 3 -
baselines/STID_M4/arch/mlp.py | 29 -
baselines/STID_M4/arch/stid_arch.py | 108 ---
baselines/STNorm/METR-LA.py | 156 ++--
baselines/STNorm/PEMS-BAY.py | 156 ++--
baselines/STNorm/PEMS03.py | 156 ++--
baselines/STNorm/PEMS04.py | 156 ++--
baselines/STNorm/PEMS07.py | 156 ++--
baselines/STNorm/PEMS08.py | 156 ++--
baselines/STNorm/run.sh | 7 -
baselines/STWave/METR-LA.py | 157 ++--
baselines/STWave/PEMS-BAY.py | 157 ++--
baselines/STWave/PEMS03.py | 157 ++--
baselines/STWave/PEMS04.py | 157 ++--
baselines/STWave/PEMS07.py | 157 ++--
baselines/STWave/PEMS08.py | 159 ++--
baselines/STWave/arch/stwave_arch.py | 26 +-
baselines/STWave/loss.py | 2 +-
baselines/STWave/run.sh | 7 -
baselines/StemGNN/METR-LA.py | 163 +++--
baselines/StemGNN/PEMS-BAY.py | 165 +++--
baselines/StemGNN/PEMS03.py | 167 +++--
baselines/StemGNN/PEMS04.py | 169 +++--
baselines/StemGNN/PEMS07.py | 167 +++--
baselines/StemGNN/PEMS08.py | 167 +++--
baselines/StemGNN/run.sh | 7 -
baselines/TimesNet/ETTh1.py | 158 ++--
baselines/TimesNet/ETTh2.py | 158 ++--
baselines/TimesNet/ETTm1.py | 160 ++--
baselines/TimesNet/ETTm2.py | 160 ++--
baselines/TimesNet/Electricity.py | 168 +++--
baselines/TimesNet/ExchangeRate.py | 162 ++--
baselines/TimesNet/Weather.py | 162 ++--
baselines/TimesNet/run.sh | 10 -
baselines/Triformer/ETTh1.py | 168 +++--
baselines/Triformer/ETTh2.py | 168 +++--
baselines/Triformer/ETTm1.py | 168 +++--
baselines/Triformer/ETTm2.py | 168 +++--
baselines/Triformer/Electricity.py | 170 +++--
baselines/Triformer/ExchangeRate.py | 168 +++--
baselines/Triformer/PEMS04.py | 109 ---
baselines/Triformer/PEMS04_LTSF.py | 132 ++++
baselines/Triformer/PEMS08.py | 108 ---
baselines/Triformer/PEMS08_LTSF.py | 132 ++++
baselines/Triformer/Weather.py | 168 +++--
baselines/Triformer/run.sh | 10 -
baselines/WaveNet/ETTh1.py | 107 ---
baselines/WaveNet/ETTh2.py | 107 ---
baselines/WaveNet/METR-LA.py | 142 ++++
basicts/__init__.py | 6 +-
basicts/archs/example_arch.py | 25 -
basicts/data/__init__.py | 14 +-
basicts/data/base_dataset.py | 100 +++
basicts/data/dataset_zoo/m4_dataset.py | 84 ---
.../data/dataset_zoo/simple_tsf_dataset.py | 73 --
basicts/data/registry.py | 3 -
basicts/data/simple_tsf_dataset.py | 124 ++++
basicts/data/transform.py | 127 ----
basicts/launcher.py | 132 +++-
basicts/losses/__init__.py | 3 -
basicts/losses/losses.py | 118 ---
basicts/metrics/__init__.py | 22 +-
basicts/metrics/mae.py | 38 +
basicts/metrics/mape.py | 53 ++
basicts/metrics/mse.py | 38 +
basicts/metrics/rmse.py | 25 +
basicts/metrics/wape.py | 27 +-
basicts/runners/__init__.py | 6 +-
basicts/runners/base_m4_runner.py | 335 ---------
basicts/runners/base_runner.py | 181 +++--
basicts/runners/base_tsf_runner.py | 692 +++++++++++-------
basicts/runners/runner_zoo/m4_tsf_runner.py | 79 --
.../runners/runner_zoo/simple_tsf_runner.py | 97 ++-
basicts/scaler/__init__.py | 9 +
basicts/scaler/base_scaler.py | 47 ++
basicts/scaler/min_max_scaler.py | 94 +++
basicts/scaler/z_score_scaler.py | 102 +++
basicts/utils/__init__.py | 17 +-
basicts/utils/adjacent_matrix_norm.py | 110 +--
basicts/utils/logging.py | 15 -
basicts/utils/m4.py | 221 ------
basicts/utils/misc.py | 82 +--
basicts/utils/serialization.py | 144 ++--
datasets/README.md | 42 +-
examples/arch.py | 52 ++
examples/complete_config.py | 213 ++++++
examples/regular_config.py | 116 +++
experiments/evaluate.py | 23 +
experiments/inference.py | 44 --
experiments/run_m4.py | 54 --
experiments/train.py | 12 +-
requirements.txt | 18 +-
.../generate_training_data.py | 231 +++---
.../CA/generate_training_data.py | 275 +++----
.../ETTh1/generate_training_data.py | 226 +++---
.../ETTh2/generate_training_data.py | 187 +++--
.../ETTm1/generate_training_data.py | 225 +++---
.../ETTm2/generate_training_data.py | 187 +++--
.../Electricity/generate_training_data.py | 227 +++---
.../ExchangeRate/generate_training_data.py | 225 +++---
.../GBA/generate_training_data.py | 207 ++++--
.../GLA/generate_training_data.py | 207 ++++--
.../Gaussian/generate_training_data.py | 182 ++---
.../Gaussian/simulate_data.py | 20 +-
.../Illness/generate_training_data.py | 228 +++---
.../M4/generate_training_data.py | 107 ---
.../METR-LA/generate_training_data.py | 259 +++----
.../PEMS-BAY/generate_training_data.py | 259 +++----
.../PEMS03/generate_training_data.py | 255 +++----
.../PEMS04/generate_training_data.py | 255 +++----
.../PEMS07/generate_training_data.py | 255 +++----
.../PEMS08/generate_training_data.py | 255 +++----
.../Pulse/generate_training_data.py | 183 ++---
.../data_preparation/Pulse/simulate_data.py | 30 +-
.../SD/generate_training_data.py | 206 ++++--
.../Traffic/generate_training_data.py | 234 +++---
.../Weather/generate_training_data.py | 225 +++---
scripts/data_preparation/run.sh | 43 +-
tutorial/config_design.md | 40 +
tutorial/dataset_design.md | 69 ++
tutorial/figures/DatasetDesign.jpeg | Bin 0 -> 144113 bytes
tutorial/figures/DesignConvention.jpeg | Bin 0 -> 274687 bytes
tutorial/getting_started.md | 177 +++++
tutorial/metrics_design.md | 66 ++
tutorial/model_design.md | 41 ++
tutorial/overall_design.md | 33 +
tutorial/runner_design.md | 111 +++
tutorial/scaler_design.md | 44 ++
477 files changed, 26539 insertions(+), 29642 deletions(-)
delete mode 100644 baselines/AGCRN/run.sh
delete mode 100644 baselines/Autoformer/PEMS04.py
create mode 100644 baselines/Autoformer/PEMS04_LTSF.py
delete mode 100644 baselines/Autoformer/PEMS08.py
create mode 100644 baselines/Autoformer/PEMS08_LTSF.py
delete mode 100644 baselines/Autoformer/run.sh
delete mode 100644 baselines/BGSLF/BGSLF_METR-LA.py
delete mode 100644 baselines/BGSLF/arch/__init__.py
delete mode 100644 baselines/BGSLF/arch/cell.py
delete mode 100644 baselines/BGSLF/arch/model.py
delete mode 100644 baselines/Crossformer/ETTh2.py
delete mode 100644 baselines/Crossformer/ETTm2.py
delete mode 100644 baselines/Crossformer/ExchangeRate.py
delete mode 100644 baselines/Crossformer/PEMS04.py
create mode 100644 baselines/Crossformer/PEMS04_LTSF.py
delete mode 100644 baselines/Crossformer/PEMS08.py
create mode 100644 baselines/Crossformer/PEMS08_LTSF.py
delete mode 100644 baselines/Crossformer/run.sh
delete mode 100644 baselines/D2STGNN/run.sh
delete mode 100644 baselines/DCRNN/run.sh
delete mode 100644 baselines/DGCRN/run.sh
delete mode 100644 baselines/DLinear/METR-LA.py
delete mode 100644 baselines/DLinear/PEMS-BAY.py
delete mode 100644 baselines/DLinear/PEMS04.py
create mode 100644 baselines/DLinear/PEMS04_LTSF.py
delete mode 100644 baselines/DLinear/PEMS08.py
create mode 100644 baselines/DLinear/PEMS08_LTSF.py
delete mode 100644 baselines/DLinear/run.sh
delete mode 100644 baselines/DSFormer/METR-LA.py
delete mode 100644 baselines/DSFormer/PEMS04.py
create mode 100644 baselines/DSFormer/PEMS04_LTSF.py
delete mode 100644 baselines/DSFormer/PEMS08.py
create mode 100644 baselines/DSFormer/PEMS08_LTSF.py
delete mode 100644 baselines/DSFormer/run.sh
delete mode 100644 baselines/DeepAR/METR-LA.py
delete mode 100644 baselines/DeepAR/PEMS-BAY.py
delete mode 100644 baselines/DeepAR/PEMS03.py
delete mode 100644 baselines/DeepAR/PEMS07.py
delete mode 100644 baselines/DeepAR/run.sh
delete mode 100644 baselines/DeepAR_M4/M4.py
delete mode 100644 baselines/DeepAR_M4/arch/__init__.py
delete mode 100644 baselines/DeepAR_M4/arch/deepar.py
delete mode 100644 baselines/DeepAR_M4/arch/distributions.py
delete mode 100644 baselines/DeepAR_M4/loss/__init__.py
delete mode 100644 baselines/DeepAR_M4/loss/gaussian.py
delete mode 100644 baselines/DeepAR_M4/runner/__init__.py
delete mode 100644 baselines/DeepAR_M4/runner/deepar_runner.py
delete mode 100644 baselines/FEDformer/PEMS04.py
create mode 100644 baselines/FEDformer/PEMS04_LTSF.py
delete mode 100644 baselines/FEDformer/PEMS08.py
create mode 100644 baselines/FEDformer/PEMS08_LTSF.py
delete mode 100644 baselines/FEDformer/run.sh
delete mode 100644 baselines/GMSDR/METR-LA.py
delete mode 100644 baselines/GMSDR/PEMS-BAY.py
delete mode 100644 baselines/GMSDR/PEMS03.py
delete mode 100644 baselines/GMSDR/PEMS04.py
delete mode 100644 baselines/GMSDR/PEMS07.py
delete mode 100644 baselines/GMSDR/PEMS08.py
delete mode 100644 baselines/GMSDR/arch/__init__.py
delete mode 100644 baselines/GMSDR/arch/gmsdr_arch.py
delete mode 100644 baselines/GMSDR/arch/gmsdr_cell.py
delete mode 100644 baselines/GMSDR/run.sh
delete mode 100644 baselines/GTS/run.sh
delete mode 100644 baselines/GWNet/run.sh
delete mode 100644 baselines/HI/HI_METR-LA_in96_out96.py
create mode 100644 baselines/HI/METR-LA.py
delete mode 100644 baselines/Informer/METR-LA.py
delete mode 100644 baselines/Informer/PEMS-BAY.py
delete mode 100644 baselines/Informer/PEMS04.py
create mode 100644 baselines/Informer/PEMS04_LTSF.py
delete mode 100644 baselines/Informer/PEMS08.py
create mode 100644 baselines/Informer/PEMS08_LTSF.py
delete mode 100644 baselines/Informer/run.sh
delete mode 100644 baselines/LSTM/CA.py
delete mode 100644 baselines/LSTM/GBA.py
delete mode 100644 baselines/LSTM/GLA.py
delete mode 100644 baselines/LSTM/SD.py
delete mode 100644 baselines/LSTM/arch/__init__.py
delete mode 100644 baselines/LSTM/arch/lstm_arch.py
delete mode 100644 baselines/LightGBM/README.md
delete mode 100644 baselines/LightGBM/Weather.py
delete mode 100644 baselines/LightGBM/evaluate.py
delete mode 100644 baselines/LightGBM/evaluate_ar.py
delete mode 100644 baselines/LightGBM/evaluate_m4_ar.py
delete mode 100644 baselines/LightGBM/run.sh
delete mode 100644 baselines/Linear/ETTh1.py
delete mode 100644 baselines/Linear/ETTh2.py
delete mode 100644 baselines/Linear/ETTm1.py
delete mode 100644 baselines/Linear/ETTm2.py
delete mode 100644 baselines/Linear/Electricity.py
delete mode 100644 baselines/Linear/ExchangeRate.py
delete mode 100644 baselines/Linear/METR-LA.py
delete mode 100644 baselines/Linear/PEMS-BAY.py
delete mode 100644 baselines/Linear/PEMS04.py
delete mode 100644 baselines/Linear/PEMS08.py
delete mode 100644 baselines/Linear/Weather.py
delete mode 100644 baselines/Linear/arch/__init__.py
delete mode 100644 baselines/Linear/arch/linear.py
delete mode 100644 baselines/Linear/run.sh
delete mode 100644 baselines/MLP/M4.py
delete mode 100644 baselines/MLP/MLP_METR-LA.py
delete mode 100644 baselines/MLP/mlp_arch.py
delete mode 100644 baselines/MTGNN/run.sh
create mode 100644 baselines/MegaCRN/METR-LA.py
delete mode 100644 baselines/MegaCRN/MegaCRN_METR-LA.py
delete mode 100644 baselines/NBeats/METR-LA.py
delete mode 100644 baselines/NBeats/PEMS-BAY.py
delete mode 100644 baselines/NBeats/PEMS03.py
delete mode 100644 baselines/NBeats/PEMS04.py
delete mode 100644 baselines/NBeats/PEMS07.py
delete mode 100644 baselines/NBeats/PEMS08.py
delete mode 100644 baselines/NBeats/run.sh
delete mode 100644 baselines/NBeats_M4/M4.py
delete mode 100644 baselines/NBeats_M4/arch/__init__.py
delete mode 100644 baselines/NBeats_M4/arch/nbeats.py
delete mode 100644 baselines/NLinear/METR-LA.py
delete mode 100644 baselines/NLinear/PEMS-BAY.py
delete mode 100644 baselines/NLinear/PEMS04.py
create mode 100644 baselines/NLinear/PEMS04_LTSF.py
delete mode 100644 baselines/NLinear/PEMS08.py
create mode 100644 baselines/NLinear/PEMS08_LTSF.py
delete mode 100644 baselines/NLinear/run.sh
delete mode 100644 baselines/PatchTST/M4.py
delete mode 100644 baselines/PatchTST/PEMS04.py
create mode 100644 baselines/PatchTST/PEMS04_LTSF.py
delete mode 100644 baselines/PatchTST/PEMS08.py
create mode 100644 baselines/PatchTST/PEMS08_LTSF.py
delete mode 100644 baselines/PatchTST/run.sh
delete mode 100644 baselines/Pyraformer/PEMS04.py
create mode 100644 baselines/Pyraformer/PEMS04_LTSF.py
delete mode 100644 baselines/Pyraformer/PEMS08.py
create mode 100644 baselines/Pyraformer/PEMS08_LTSF.py
delete mode 100644 baselines/Pyraformer/run.sh
delete mode 100644 baselines/STAEformer/run.sh
delete mode 100644 baselines/STEP/STEP_PEMS-BAY.py
delete mode 100644 baselines/STEP/STEP_PEMS03.py
delete mode 100644 baselines/STEP/STEP_PEMS04.py
delete mode 100644 baselines/STEP/STEP_PEMS07.py
delete mode 100644 baselines/STEP/STEP_PEMS08.py
create mode 100644 baselines/STEP/TSFormer_METR-LA.py
rename baselines/STEP/{step_arch => arch}/__init__.py (100%)
rename baselines/STEP/{step_arch => arch}/discrete_graph_learning.py (97%)
rename baselines/STEP/{step_arch => arch}/graphwavenet/__init__.py (100%)
rename baselines/STEP/{step_arch => arch}/graphwavenet/model.py (100%)
rename baselines/STEP/{step_arch => arch}/similarity.py (100%)
rename baselines/STEP/{step_arch => arch}/step.py (68%)
rename baselines/STEP/{step_arch => arch}/tsformer/__init__.py (100%)
rename baselines/STEP/{step_arch => arch}/tsformer/mask.py (100%)
rename baselines/STEP/{step_arch => arch}/tsformer/patch.py (100%)
rename baselines/STEP/{step_arch => arch}/tsformer/positional_encoding.py (100%)
rename baselines/STEP/{step_arch => arch}/tsformer/transformer_layers.py (100%)
rename baselines/STEP/{step_arch => arch}/tsformer/tsformer.py (98%)
rename baselines/STEP/{step_loss => loss}/__init__.py (100%)
rename baselines/STEP/{step_loss => loss}/step_loss.py (93%)
create mode 100644 baselines/STEP/runner/__init__.py
rename baselines/STEP/{step_runner/step_runner.py => runner/tsformer_runner.py} (53%)
delete mode 100644 baselines/STEP/step_data/__init__.py
delete mode 100644 baselines/STEP/step_data/forecasting_dataset.py
delete mode 100644 baselines/STEP/step_data/pretraining_dataset.py
delete mode 100644 baselines/STEP/step_runner/__init__.py
delete mode 100644 baselines/STGCN/run.sh
delete mode 100644 baselines/STID/run.sh
delete mode 100644 baselines/STID_M4/M4.py
delete mode 100644 baselines/STID_M4/arch/__init__.py
delete mode 100644 baselines/STID_M4/arch/mlp.py
delete mode 100644 baselines/STID_M4/arch/stid_arch.py
delete mode 100644 baselines/STNorm/run.sh
delete mode 100644 baselines/STWave/run.sh
delete mode 100644 baselines/StemGNN/run.sh
delete mode 100644 baselines/TimesNet/run.sh
delete mode 100644 baselines/Triformer/PEMS04.py
create mode 100644 baselines/Triformer/PEMS04_LTSF.py
delete mode 100644 baselines/Triformer/PEMS08.py
create mode 100644 baselines/Triformer/PEMS08_LTSF.py
delete mode 100644 baselines/Triformer/run.sh
delete mode 100644 baselines/WaveNet/ETTh1.py
delete mode 100644 baselines/WaveNet/ETTh2.py
create mode 100644 baselines/WaveNet/METR-LA.py
delete mode 100644 basicts/archs/example_arch.py
create mode 100644 basicts/data/base_dataset.py
delete mode 100644 basicts/data/dataset_zoo/m4_dataset.py
delete mode 100644 basicts/data/dataset_zoo/simple_tsf_dataset.py
delete mode 100644 basicts/data/registry.py
create mode 100644 basicts/data/simple_tsf_dataset.py
delete mode 100644 basicts/data/transform.py
delete mode 100644 basicts/losses/__init__.py
delete mode 100644 basicts/losses/losses.py
create mode 100644 basicts/metrics/mae.py
create mode 100644 basicts/metrics/mape.py
create mode 100644 basicts/metrics/mse.py
create mode 100644 basicts/metrics/rmse.py
delete mode 100644 basicts/runners/base_m4_runner.py
delete mode 100644 basicts/runners/runner_zoo/m4_tsf_runner.py
create mode 100644 basicts/scaler/__init__.py
create mode 100644 basicts/scaler/base_scaler.py
create mode 100644 basicts/scaler/min_max_scaler.py
create mode 100644 basicts/scaler/z_score_scaler.py
delete mode 100644 basicts/utils/logging.py
delete mode 100644 basicts/utils/m4.py
create mode 100644 examples/arch.py
create mode 100644 examples/complete_config.py
create mode 100644 examples/regular_config.py
create mode 100644 experiments/evaluate.py
delete mode 100644 experiments/inference.py
delete mode 100644 experiments/run_m4.py
delete mode 100644 scripts/data_preparation/M4/generate_training_data.py
create mode 100644 tutorial/config_design.md
create mode 100644 tutorial/dataset_design.md
create mode 100644 tutorial/figures/DatasetDesign.jpeg
create mode 100644 tutorial/figures/DesignConvention.jpeg
create mode 100644 tutorial/getting_started.md
create mode 100644 tutorial/metrics_design.md
create mode 100644 tutorial/model_design.md
create mode 100644 tutorial/overall_design.md
create mode 100644 tutorial/runner_design.md
create mode 100644 tutorial/scaler_design.md
diff --git a/.gitignore b/.gitignore
index ede67f56..96cc9655 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
__pycache__/
.vscode/
checkpoints/
-datasets/raw_data
+datasets/
todo.md
gpu_task.py
cmd.sh
@@ -14,11 +14,11 @@ cmd.sh
*.pkl
*.h5
*.pt
-core*
*.p
*.pickle
*.pyc
*.txt
+*.core
*.py[cod]
*$py.class
diff --git a/.pylintrc b/.pylintrc
index ca0cd856..093ba6ab 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -430,6 +430,6 @@ valid-metaclass-classmethod-first-arg=mcs
# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
-overgeneral-exceptions=StandardError,
- Exception,
- BaseException
\ No newline at end of file
+overgeneral-exceptions=builtins.StandardError,
+ builtins.Exception,
+ builtins.BaseException
diff --git a/README.md b/README.md
index 1baf1a0c..6a7563d5 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-
A Standard and Fair Time Series Forecasting Benchmark and Toolkit.
+
A Fair and Scalable Time Series Forecasting Benchmark and Toolkit.
---
@@ -15,7 +15,23 @@
-$\text{BasicTS}^{+}$ (**Basic** **T**ime **S**eries **P**lus) is an enhanced benchmark and toolbox designed for time series forecasting. $\text{BasicTS}^{+}$ evolved from its predecessor, [BasicTS](https://github.com/zezhishao/BasicTS/blob/v1/README.md), and now has robust support for spatial-temporal forecasting and long time-series forecasting as well as more general tasks, such as M4 competition. For brevity and consistency, we will interchangeably refer to this project as $\text{BasicTS}^{+}$ and $\text{BasicTS}$.
+
+
+🎉 [**Getting Started**](./tutorial/getting_started.md) **|**
+💡 [**Overall Design**](./tutorial/overall_design.md)
+
+📦 [**Dataset**](./tutorial/dataset_design.md) **|**
+🛠️ [**Scaler**](./tutorial/scaler_design.md) **|**
+🧠 [**Model**](./tutorial/model_design.md) **|**
+📉 [**Metrics**](./tutorial/metrics_design.md) **|**
+🏃♂️ [**Runner**](./tutorial/runner_design.md) **|**
+📜 [**Config**](./tutorial/config_design.md.md) **|**
+📜 [**Baselines**](./baselines/)
+
+
+
+
+$\text{BasicTS}^{+}$ (**Basic** **T**ime **S**eries **P**lus) is an enhanced benchmark and toolbox designed for time series forecasting. $\text{BasicTS}^{+}$ evolved from its predecessor, [BasicTS](https://github.com/zezhishao/BasicTS/blob/v1/README.md), and now has robust support for spatial-temporal forecasting and long time-series forecasting as well as more general tasks. For brevity and consistency, we will interchangeably refer to this project as $\text{BasicTS}^{+}$ and $\text{BasicTS}$.
On the one hand, BasicTS utilizes a ***unified and standard pipeline*** to give a ***fair and exhaustive*** reproduction and comparison of popular deep learning-based models.
@@ -23,16 +39,16 @@ On the other hand, BasicTS provides users with ***easy-to-use and extensible int
We are collecting **TODOs** and **HOWTOs**, if you need more features (*e.g.* more datasets or baselines) or have any questions, please feel free to create an issue or leave a comment [here](https://github.com/zezhishao/BasicTS/issues/95).
-If you find this repository useful for your work, please cite it as [such](./citation.bib):
-
-```LaTeX
-@article{shao2023exploring,
- title={Exploring Progress in Multivariate Time Series Forecasting: Comprehensive Benchmarking and Heterogeneity Analysis},
- author={Shao, Zezhi and Wang, Fei and Xu, Yongjun and Wei, Wei and Yu, Chengqing and Zhang, Zhao and Yao, Di and Jin, Guangyin and Cao, Xin and Cong, Gao and others},
- journal={arXiv preprint arXiv:2310.06119},
- year={2023}
-}
-```
+> [!IMPORTANT]
+> If you find this repository useful for your work, please cite it as [such](./citation.bib):
+> ```LaTeX
+> @article{shao2023exploring,
+> title={Exploring Progress in Multivariate Time Series Forecasting: Comprehensive Benchmarking and Heterogeneity Analysis},
+> author={Shao, Zezhi and Wang, Fei and Xu, Yongjun and Wei, Wei and Yu, Chengqing and Zhang, Zhao and Yao, Di and Jin, Guangyin and Cao, Xin and Cong, Gao and others},
+> journal={arXiv preprint arXiv:2310.06119},
+> year={2023}
+> }
+> ```
## ✨ Highlighted Features
@@ -81,108 +97,14 @@ BasicTS implements a wealth of models, including classic models, spatial-tempora
- DCRNN, Graph WaveNet, MTGNN, STID, D2STGNN, STEP, DGCRN, DGCRN, STNorm, AGCRN, GTS, StemGNN, MegaCRN, STGCN, STWave, STAEformer, GMSDR, ...
- Informer, Autoformer, FEDformer, Pyraformer, DLinear, NLinear, Triformer, Crossformer, ...
-## 💿 Dependencies
-
-
- Preliminaries
-
-
-### OS
-
-We recommend using BasicTS on Linux systems (*e.g.* Ubuntu and CentOS).
-Other systems (*e.g.*, Windows and macOS) have not been tested.
-
-### Python
-
-Python >= 3.6 (recommended >= 3.9).
-
-[Miniconda](https://docs.conda.io/en/latest/miniconda.html) or [Anaconda](https://www.anaconda.com/) are recommended to create a virtual python environment.
-
-### Other Dependencies
-
-
-BasicTS is built based on PyTorch and [EasyTorch](https://github.com/cnstark/easytorch).
-You can install PyTorch following the instruction in [PyTorch](https://pytorch.org/get-started/locally/). For example:
-
-```bash
-pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html
-```
-
-**After ensuring** that PyTorch is installed correctly, you can install other dependencies via:
-
-```bash
-pip install -r requirements.txt
-```
-
-### Warning
-
-BasicTS is built on PyTorch 1.9.1 or 1.10.0, while other versions have not been tested.
-
-
-## 🎯 Getting Started of Developing with BasicTS
-
-### Preparing Data
-
-- **Clone BasicTS**
+## 🚀 Installation and Quick Start
- ```bash
- cd /path/to/your/project
- git clone https://github.com/zezhishao/BasicTS.git
- ```
+For detailed instructions, please refer to the [Getting Started](./tutorial/getting_started.md) tutorial.
-- **Download Raw Data**
-
- You can download all the raw datasets at [Google Drive](https://drive.google.com/drive/folders/14EJVODCU48fGK0FkyeVom_9lETh80Yjp) or [Baidu Yun](https://pan.baidu.com/s/10gOPtlC9M4BEjx89VD1Vbw)(password: 6v0a), and unzip them to `datasets/raw_data/`.
-
-- **Pre-process Data**
-
- ```bash
- cd /path/to/your/project
- python scripts/data_preparation/${DATASET_NAME}/generate_training_data.py
- ```
-
- Replace `${DATASET_NAME}` with one of `METR-LA`, `PEMS-BAY`, `PEMS03`, `PEMS04`, `PEMS07`, `PEMS08`, or any other supported dataset. The processed data will be placed in `datasets/${DATASET_NAME}`.
-
-
-### 3 Steps to Evaluate Your Model
-
-- **Define Your Model Architecture**
-
- The `forward` function needs to follow the conventions of BasicTS. You can find an example of the Multi-Layer Perceptron (`MLP`) model in [baselines/MLP/mlp_arch.py](baselines/MLP/mlp_arch.py)
-
-- **Define Your Runner for Your Model** (Optional)
-
- BasicTS provides a unified and standard pipeline in `basicts.runner.BaseTimeSeriesForecastingRunner`.
- Nevertheless, you still need to define the specific forward process (the `forward` function in the **runner**).
- Fortunately, BasicTS also provides such an implementation in `basicts.runner.SimpleTimeSeriesForecastingRunner`, which can cover most of the situations.
- The runner for the `MLP` model can also use this built-in runner.
- You can also find more runners in `basicts.runners.runner_zoo` to learn more about the runner design.
-
-- **Configure your Configuration File**
-
- You can configure all the details of the pipeline and hyperparameters in a configuration file, *i.e.*, **everything is based on config**.
- The configuration file is a `.py` file, in which you can import your model and runner and set all the options. BasicTS uses `EasyDict` to serve as a parameter container, which is extensible and flexible to use.
- An example of the configuration file for the `MLP` model on the `METR-LA` dataset can be found in [baselines/MLP/MLP_METR-LA.py](baselines/MLP/MLP_METR-LA.py)
-
-### Run It!
-
-- **Reproducing Built-in Models**
-
- BasicTS provides a wealth of built-in models. You can reproduce these models by running the following command:
-
- ```bash
- python experiments/train.py -c baselines/${MODEL_NAME}/${DATASET_NAME}.py --gpus '0'
- ```
-
- Replace `${DATASET_NAME}` and `${MODEL_NAME}` with any supported models and datasets. For example, you can run Graph WaveNet on METR-LA dataset by:
-
- ```bash
- python experiments/train.py -c baselines/GWNet/METR-LA.py --gpus '0'
- ```
-
-- **Customized Your Own Model**
+## 📉 Main Results
- [Example: Multi-Layer Perceptron (MLP)](baselines/MLP)
+See the paper *[Exploring Progress in Multivariate Time Series Forecasting:
+Comprehensive Benchmarking and Heterogeneity Analysis](https://arxiv.org/pdf/2310.06119.pdf).*
## Contributors ✨
@@ -212,11 +134,6 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
-## 📉 Main Results
-
-See the paper *[Exploring Progress in Multivariate Time Series Forecasting:
-Comprehensive Benchmarking and Heterogeneity Analysis](https://arxiv.org/pdf/2310.06119.pdf).*
-
## 🔗 Acknowledgement
BasicTS is developed based on [EasyTorch](https://github.com/cnstark/easytorch), an easy-to-use and powerful open-source neural network training framework.
diff --git a/baselines/AGCRN/METR-LA.py b/baselines/AGCRN/METR-LA.py
index 571f87a5..c0f14c3c 100644
--- a/baselines/AGCRN/METR-LA.py
+++ b/baselines/AGCRN/METR-LA.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 207,
"input_dim" : 2,
"rnn_units" : 64,
@@ -44,63 +34,95 @@
"embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/PEMS-BAY.py b/baselines/AGCRN/PEMS-BAY.py
index eb6a05a6..8225872b 100644
--- a/baselines/AGCRN/PEMS-BAY.py
+++ b/baselines/AGCRN/PEMS-BAY.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 325,
"input_dim" : 2,
"rnn_units" : 64,
@@ -44,63 +34,95 @@
"embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/PEMS03.py b/baselines/AGCRN/PEMS03.py
index 5e045633..a48c8444 100644
--- a/baselines/AGCRN/PEMS03.py
+++ b/baselines/AGCRN/PEMS03.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 358,
"input_dim" : 1,
"rnn_units" : 64,
@@ -44,63 +34,95 @@
"embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/PEMS04.py b/baselines/AGCRN/PEMS04.py
index 28a1d337..de4dc827 100644
--- a/baselines/AGCRN/PEMS04.py
+++ b/baselines/AGCRN/PEMS04.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 307,
"input_dim" : 1,
"rnn_units" : 64,
@@ -44,63 +34,95 @@
"embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/PEMS07.py b/baselines/AGCRN/PEMS07.py
index 632a9924..e0e53acc 100644
--- a/baselines/AGCRN/PEMS07.py
+++ b/baselines/AGCRN/PEMS07.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 883,
"input_dim" : 1,
"rnn_units" : 64,
@@ -44,63 +34,95 @@
"embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/PEMS08.py b/baselines/AGCRN/PEMS08.py
index afb12562..d463f776 100644
--- a/baselines/AGCRN/PEMS08.py
+++ b/baselines/AGCRN/PEMS08.py
@@ -1,39 +1,29 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import AGCRN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "AGCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "AGCRN"
-CFG.MODEL.ARCH = AGCRN
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = AGCRN
+MODEL_PARAM = {
"num_nodes" : 170,
"input_dim" : 1,
"rnn_units" : 64,
@@ -41,66 +31,98 @@
"horizon" : 12,
"num_layers": 2,
"default_graph": True,
- "embed_dim" : 2,
+ "embed_dim" : 10,
"cheb_k" : 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
+############################## Metrics Configuration ##############################
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
-# train data
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {'lr': 0.003}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/AGCRN/run.sh b/baselines/AGCRN/run.sh
deleted file mode 100644
index 2927c384..00000000
--- a/baselines/AGCRN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/AGCRN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS08.py --gpus '0'
diff --git a/baselines/Autoformer/ETTh1.py b/baselines/Autoformer/ETTh1.py
index c4002f1c..a34ea2dc 100644
--- a/baselines/Autoformer/ETTh1.py
+++ b/baselines/Autoformer/ETTh1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -59,74 +48,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/ETTh2.py b/baselines/Autoformer/ETTh2.py
index b7842e3c..82fec232 100644
--- a/baselines/Autoformer/ETTh2.py
+++ b/baselines/Autoformer/ETTh2.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -59,74 +48,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/ETTm1.py b/baselines/Autoformer/ETTm1.py
index 251c008b..726ee87b 100644
--- a/baselines/Autoformer/ETTm1.py
+++ b/baselines/Autoformer/ETTm1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 192 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -54,79 +43,112 @@
"embed": "timeF", # [timeF, fixed, learned]
"activation": "gelu",
"num_time_features": 4, # number of used time features
- "time_of_day_size": 24 * 4,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/ETTm2.py b/baselines/Autoformer/ETTm2.py
index cd562d50..04505b92 100644
--- a/baselines/Autoformer/ETTm2.py
+++ b/baselines/Autoformer/ETTm2.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -54,79 +43,112 @@
"embed": "timeF", # [timeF, fixed, learned]
"activation": "gelu",
"num_time_features": 4, # number of used time features
- "time_of_day_size": 24 * 4,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/Electricity.py b/baselines/Autoformer/Electricity.py
index 4b104df7..2068f3d4 100644
--- a/baselines/Autoformer/Electricity.py
+++ b/baselines/Autoformer/Electricity.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -59,64 +48,106 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/ExchangeRate.py b/baselines/Autoformer/ExchangeRate.py
index b9e93c66..d298705a 100644
--- a/baselines/Autoformer/ExchangeRate.py
+++ b/baselines/Autoformer/ExchangeRate.py
@@ -1,47 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -59,71 +49,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mse
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/PEMS04.py b/baselines/Autoformer/PEMS04.py
deleted file mode 100644
index a6b91f81..00000000
--- a/baselines/Autoformer/PEMS04.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Autoformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
- "moving_avg": 65, # window size of moving average. This is a CRUCIAL hyper-parameter.
- "output_attention": False,
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "d_model": 512,
- "embed": "timeF", # [timeF, fixed, learned]
- "dropout": 0.05,
- "factor": 6, # attn factor
- "n_heads": 8,
- "d_ff": 2048,
- "activation": "gelu",
- "e_layers": 2, # num of encoder layers
- "d_layers": 1, # num of decoder layers
- "num_time_features": 2, # number of used time features
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Autoformer/PEMS04_LTSF.py b/baselines/Autoformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..9efffbc7
--- /dev/null
+++ b/baselines/Autoformer/PEMS04_LTSF.py
@@ -0,0 +1,155 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Autoformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
+ "moving_avg": 65, # window size of moving average. This is a CRUCIAL hyper-parameter.
+ "output_attention": False,
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "d_model": 512,
+ "embed": "timeF", # [timeF, fixed, learned]
+ "dropout": 0.05,
+ "factor": 6, # attn factor
+ "n_heads": 8,
+ "d_ff": 2048,
+ "activation": "gelu",
+ "e_layers": 2, # num of encoder layers
+ "d_layers": 1, # num of decoder layers
+ "num_time_features": 2, # number of used time features
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/PEMS08.py b/baselines/Autoformer/PEMS08.py
deleted file mode 100644
index 098621a7..00000000
--- a/baselines/Autoformer/PEMS08.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Autoformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
-NUM_NODES = 170
-CFG.MODEL.PARAM = EasyDict(
- {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
- "moving_avg": 65, # window size of moving average. This is a CRUCIAL hyper-parameter.
- "output_attention": False,
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "d_model": 512,
- "embed": "timeF", # [timeF, fixed, learned]
- "dropout": 0.05,
- "factor": 6, # attn factor
- "n_heads": 8,
- "d_ff": 2048,
- "activation": "gelu",
- "e_layers": 2, # num of encoder layers
- "d_layers": 1, # num of decoder layers
- "num_time_features": 2, # number of used time features
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Autoformer/PEMS08_LTSF.py b/baselines/Autoformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..1dfe44b0
--- /dev/null
+++ b/baselines/Autoformer/PEMS08_LTSF.py
@@ -0,0 +1,155 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Autoformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
+ "moving_avg": 65, # window size of moving average. This is a CRUCIAL hyper-parameter.
+ "output_attention": False,
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "d_model": 512,
+ "embed": "timeF", # [timeF, fixed, learned]
+ "dropout": 0.05,
+ "factor": 6, # attn factor
+ "n_heads": 8,
+ "d_ff": 2048,
+ "activation": "gelu",
+ "e_layers": 2, # num of encoder layers
+ "d_layers": 1, # num of decoder layers
+ "num_time_features": 2, # number of used time features
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/Weather.py b/baselines/Autoformer/Weather.py
index 7c7f00ff..ea10cf90 100644
--- a/baselines/Autoformer/Weather.py
+++ b/baselines/Autoformer/Weather.py
@@ -1,47 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Autoformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Autoformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather Data"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Autoformer"
-CFG.MODEL.ARCH = Autoformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"factor": 3, # attn factor
"d_model": 512,
"moving_avg": 25, # window size of moving average. This is a CRUCIAL hyper-parameter.
@@ -59,71 +49,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.00001,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Autoformer/run.sh b/baselines/Autoformer/run.sh
deleted file mode 100644
index e1aa2578..00000000
--- a/baselines/Autoformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# #!/bin/bash
-python experiments/train.py -c baselines/Autoformer/ETTh1.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTh2.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTm1.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTm2.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/Electricity.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ExchangeRate.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/Weather.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/PEMS08.py --gpus '1'
diff --git a/baselines/BGSLF/BGSLF_METR-LA.py b/baselines/BGSLF/BGSLF_METR-LA.py
deleted file mode 100644
index b6db0f3d..00000000
--- a/baselines/BGSLF/BGSLF_METR-LA.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
-from basicts.losses import masked_mae
-
-from .arch import BGSLF
-
-CFG = EasyDict()
-
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "BGSLF model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "BGSLF"
-CFG.MODEL.ARCH = BGSLF
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
- "node_feas": torch.Tensor(node_feats),
- "temperature": 0.5,
- "args": EasyDict({
- "device": torch.device("cuda:0"),
- "cl_decay_steps": 2000,
- "filter_type": "dual_random_walk",
- "horizon": 12,
- "feas_dim": 1,
- "input_dim": 2,
- "ll_decay": 0,
- "num_nodes": 207,
- "max_diffusion_step": 2,
- "num_rnn_layers": 1,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "use_curriculum_learning": True,
- "embedding_size": 256,
- "kernel_size": 12,
- "freq": 288,
- "requires_graph": 2
- })
-
-}
-CFG.MODEL.SETUP_GRAPH = True
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.003,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [20, 40],
- "gamma": 0.1
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/BGSLF/arch/__init__.py b/baselines/BGSLF/arch/__init__.py
deleted file mode 100644
index 72d76943..00000000
--- a/baselines/BGSLF/arch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .model import BGSLF
\ No newline at end of file
diff --git a/baselines/BGSLF/arch/cell.py b/baselines/BGSLF/arch/cell.py
deleted file mode 100644
index 166cdb94..00000000
--- a/baselines/BGSLF/arch/cell.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import numpy as np
-import torch
-from torch.autograd import Function
-
-class SSU(Function): # Smooth Sparse Units(光滑的稀疏单元) "SSU":"su"
-
- @staticmethod
- def forward(ctx, input, alpha, epsilon): # ctx:"context" input:x_tensor
-
- # t = f(1-x)/f(x) x>0
- t = torch.where((input > 0.) & (input < 1.), torch.exp(1. / input - 1. / (1 - input)), torch.zeros_like(input))
- # tx = 1/pow(x,2) + 1/pow(1-x,2)
- tx = torch.where(t > 0., 1. / pow(input, 2) + 1. / pow(1 - input, 2), torch.zeros_like(input))
-
- output = torch.where(input <= 0., torch.zeros_like(input), input)
- output = torch.where(input >= 1., torch.ones_like(input), output)
- output = torch.where(t > 0., alpha / (alpha + t), output)
-
- ctx.save_for_backward(t, tx, output, alpha, epsilon)
-
- return output
-
- @staticmethod
- def backward(ctx, grad_output):
- # 链式法则: dloss / dx = (dloss / doutput) * (doutput / dx)
- # dloss / doutput就是输入的参数grad_output
-
- t, tx, output, alpha, epsilon = ctx.saved_tensors
-
- grad_input = alpha * pow(output, 2) * t * tx * grad_output.clone()
-
- sup = alpha * epsilon / (1 - epsilon)
- inf = alpha * (1 - epsilon) / epsilon
- grad_input[t > inf] = grad_output[t > inf]
- grad_input[(t < sup) & (t > 0)] = grad_output[(t < sup) & (t > 0)]
-
- return grad_input, None, None
-
-
-def SmoothSparseUnit(x, alpha, epsilon=0.05):
- alpha = torch.tensor(alpha)
- epsilon = torch.tensor(epsilon)
- return SSU.apply(x, alpha, epsilon)
-
-
-class LayerParams:
- def __init__(self, rnn_network: torch.nn.Module, layer_type: str, device):
- self._rnn_network = rnn_network
- self._params_dict = {}
- self._biases_dict = {}
- self._type = layer_type
- self.device = device
-
- def get_weights(self, shape):
- if shape not in self._params_dict:
- nn_param = torch.nn.Parameter(torch.empty(*shape, device=self.device))
- torch.nn.init.xavier_normal_(nn_param)
- self._params_dict[shape] = nn_param
- self._rnn_network.register_parameter('{}_weight_{}'.format(self._type, str(shape)),
- nn_param)
- return self._params_dict[shape]
-
- def get_biases(self, length, bias_start=0.0):
- if length not in self._biases_dict:
- biases = torch.nn.Parameter(torch.empty(length, device=self.device))
- torch.nn.init.constant_(biases, bias_start)
- self._biases_dict[length] = biases
- self._rnn_network.register_parameter('{}_biases_{}'.format(self._type, str(length)),
- biases)
-
- return self._biases_dict[length]
-
-
-class DCGRUCell(torch.nn.Module):
- def __init__(self, num_units, max_diffusion_step, num_nodes, nonlinearity='tanh',
- filter_type="laplacian", use_gc_for_ru=True, device='cuda'):
- """
- :param num_units:
- :param adj_mx:
- :param max_diffusion_step:
- :param num_nodes:
- :param nonlinearity:
- :param filter_type: "laplacian", "random_walk", "dual_random_walk".
- :param use_gc_for_ru: whether to use Graph convolution to calculate the reset and update gates.
- """
-
- super().__init__()
- self._activation = torch.tanh if nonlinearity == 'tanh' else torch.relu
- # support other nonlinearities up here?
- self._num_nodes = num_nodes
- self._num_units = num_units
- self._max_diffusion_step = max_diffusion_step
- self._supports = []
- self._use_gc_for_ru = use_gc_for_ru
-
- self.device = device
- self._fc_params = LayerParams(self, 'fc', self.device)
- self._gconv_params = LayerParams(self, 'gconv', self.device)
-
- @staticmethod
- def _build_sparse_matrix(L):
- L = L.tocoo()
- indices = np.column_stack((L.row, L.col))
- # this is to ensure row-major ordering to equal torch.sparse.sparse_reorder(L)
- indices = indices[np.lexsort((indices[:, 0], indices[:, 1]))]
- L = torch.sparse_coo_tensor(indices.T, L.data, L.shape, device=self.device)
- return L
-
- def _calculate_random_walk_matrix(self, adj_mx):
-
- # tf.Print(adj_mx, [adj_mx], message="This is adj: ")
-
- adj_mx = adj_mx + torch.eye(int(adj_mx.shape[0])).to(self.device)
- d = torch.sum(adj_mx, 1)
- d_inv = 1. / d
- d_inv = torch.where(torch.isinf(d_inv), torch.zeros(d_inv.shape).to(self.device), d_inv)
- d_mat_inv = torch.diag(d_inv)
- random_walk_mx = torch.mm(d_mat_inv, adj_mx)
- return random_walk_mx
-
- def forward(self, inputs, hx, adj):
- """Gated recurrent unit (GRU) with Graph Convolution.
- :param inputs: (B, num_nodes * input_dim)
- :param hx: (B, num_nodes * rnn_units)
- :return
- - Output: A `2-D` tensor with shape `(B, num_nodes * rnn_units)`.
- """
- adj_mx = self._calculate_random_walk_matrix(adj).t()
- output_size = 2 * self._num_units
- if self._use_gc_for_ru:
- fn = self._gconv
- else:
- fn = self._fc
- value = torch.sigmoid(fn(inputs, adj_mx, hx, output_size, bias_start=1.0))
- value = torch.reshape(value, (-1, self._num_nodes, output_size))
- r, u = torch.split(tensor=value, split_size_or_sections=self._num_units, dim=-1)
- r = torch.reshape(r, (-1, self._num_nodes * self._num_units))
- u = torch.reshape(u, (-1, self._num_nodes * self._num_units))
-
- c = self._gconv(inputs, adj_mx, r * hx, self._num_units)
- if self._activation is not None:
- c = self._activation(c)
-
- new_state = u * hx + (1.0 - u) * c
- return new_state
-
- @staticmethod
- def _concat(x, x_):
- x_ = x_.unsqueeze(0)
- return torch.cat([x, x_], dim=0)
-
- def _fc(self, inputs, state, output_size, bias_start=0.0):
- batch_size = inputs.shape[0]
- inputs = torch.reshape(inputs, (batch_size * self._num_nodes, -1))
- state = torch.reshape(state, (batch_size * self._num_nodes, -1))
- inputs_and_state = torch.cat([inputs, state], dim=-1)
- input_size = inputs_and_state.shape[-1]
- weights = self._fc_params.get_weights((input_size, output_size))
- value = torch.sigmoid(torch.matmul(inputs_and_state, weights))
- biases = self._fc_params.get_biases(output_size, bias_start)
- value += biases
- return value
-
- def _gconv(self, inputs, adj_mx, state, output_size, bias_start=0.0):
- # Reshape input and state to (batch_size, num_nodes, input_dim/state_dim)
- batch_size = inputs.shape[0]
- inputs = torch.reshape(inputs, (batch_size, self._num_nodes, -1))
- state = torch.reshape(state, (batch_size, self._num_nodes, -1))
- inputs_and_state = torch.cat([inputs, state], dim=2)
- input_size = inputs_and_state.size(2)
-
- x = inputs_and_state
- x0 = x.permute(1, 2, 0) # (num_nodes, total_arg_size, batch_size)
- x0 = torch.reshape(x0, shape=[self._num_nodes, input_size * batch_size])
- x = torch.unsqueeze(x0, 0)
-
- if self._max_diffusion_step == 0:
- pass
- else:
- x1 = torch.mm(adj_mx, x0)
- x = self._concat(x, x1)
-
- for k in range(2, self._max_diffusion_step + 1):
- x2 = 2 * torch.mm(adj_mx, x1) - x0
- x = self._concat(x, x2)
- x1, x0 = x2, x1
- '''
- Option:
- for support in self._supports:
- x1 = torch.sparse.mm(support, x0)
- x = self._concat(x, x1)
- for k in range(2, self._max_diffusion_step + 1):
- x2 = 2 * torch.sparse.mm(support, x1) - x0
- x = self._concat(x, x2)
- x1, x0 = x2, x1
- '''
- num_matrices = self._max_diffusion_step + 1 # Adds for x itself.
- x = torch.reshape(x, shape=[num_matrices, self._num_nodes, input_size, batch_size])
- x = x.permute(3, 1, 2, 0) # (batch_size, num_nodes, input_size, order)
- x = torch.reshape(x, shape=[batch_size * self._num_nodes, input_size * num_matrices])
-
- weights = self._gconv_params.get_weights((input_size * num_matrices, output_size))
- x = torch.matmul(x, weights) # (batch_size * self._num_nodes, output_size)
-
- biases = self._gconv_params.get_biases(output_size, bias_start)
- x += biases
- # Reshape res back to 2D: (batch_size, num_node, state_dim) -> (batch_size, num_node * state_dim)
- return torch.reshape(x, [batch_size, self._num_nodes * output_size])
diff --git a/baselines/BGSLF/arch/model.py b/baselines/BGSLF/arch/model.py
deleted file mode 100644
index 042ae85f..00000000
--- a/baselines/BGSLF/arch/model.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-from .cell import DCGRUCell, SmoothSparseUnit
-import numpy as np
-
-def count_parameters(model):
- return sum(p.numel() for p in model.parameters() if p.requires_grad)
-
-def cosine_similarity_torch(x1, x2=None, eps=1e-8):
- x2 = x1 if x2 is None else x2
- w1 = x1.norm(p=2, dim=1, keepdim=True)
- w2 = w1 if x2 is x1 else x2.norm(p=2, dim=1, keepdim=True)
- return torch.mm(x1, x2.t()) / (w1 * w2.t()).clamp(min=eps)
-
-class Adjacency_generator(nn.Module):
- def __init__(self, embedding_size, num_nodes, time_series, kernel_size, freq, requires_graph, seq_len, feas_dim, input_dim, device, reduction_ratio=16):
- super(Adjacency_generator, self).__init__()
- self.freq = freq
- self.kernel_size = kernel_size
- self.num_nodes = num_nodes
- self.embedding = embedding_size
- self.time_series = time_series
- self.seq_len = seq_len
- self.feas_dim = feas_dim
- self.input_dim = input_dim
- self.segm = int((self.time_series.shape[0]-1) // self.freq)
- self.graphs = requires_graph
- self.delta_series = torch.zeros_like(self.time_series).to(device)
- self.conv1d = nn.Conv1d(in_channels=self.segm * self.feas_dim, out_channels=self.graphs, kernel_size=kernel_size, padding=0)
- self.fc_1 = nn.Linear(self.freq - self.kernel_size + 1, self.embedding)
- self.fc_2 = nn.Linear(self.embedding, self.embedding // reduction_ratio)
- self.fc_3 = nn.Linear(self.embedding // reduction_ratio, self.num_nodes)
- self.process()
- self.device = device
-
- def process(self):
-
- for i in range(self.time_series.shape[0]):
- if i == 0:
- self.delta_series[i] = self.time_series[i]
- else:
- self.delta_series[i] = self.time_series[i]-self.time_series[i-1]
- times = []
- for i in range(self.segm):
- time_seg = self.delta_series[i*self.freq + 1 : (i+1)*self.freq + 1]
- times.append(time_seg)
-
- t = torch.stack(times, dim=0).reshape(self.segm, self.freq, self.num_nodes, self.feas_dim) # (num_segment, freq, num_nodes, feas_dim)
- self.t = t
-
- def forward(self, node_feas): # input: (seq_len, batch_size, num_sensor * input_dim)
- t = self.t.permute(2, 0, 3, 1)
- self.times = t.reshape(self.num_nodes, -1, self.freq)
- mid_input = self.conv1d(self.times).permute(1,0,2) # (graphs, num_nodes, freq-kernel_size+1)
- mid_output = torch.stack([F.relu(self.fc_1(mid_input[i,...])) for i in range(self.graphs)], dim=0)
- out_put = F.relu(self.fc_2(mid_output))
- output = torch.sigmoid(self.fc_3(out_put))
- # cos_similarity
- max_similarity = -999999
- seq_len = node_feas.shape[0]
- batch_size = node_feas.shape[1]
- node_feas = node_feas.reshape(seq_len, batch_size, self.num_nodes, -1)
- node_feas = node_feas.permute(1, 2, 3, 0)
- node_feas = node_feas.reshape(batch_size, self.num_nodes, -1)
- nodes_feature = torch.zeros(node_feas.shape[1], node_feas.shape[2]).to(self.device)
- for i in range(node_feas.shape[0]):
- nodes_feature += node_feas[i, ...]
- node_feas = torch.matmul(nodes_feature, nodes_feature.T)
- select = -1
- for graph_idx in range(output.shape[0]):
- x_1 = node_feas.reshape(1, -1)
- x_2 = output[graph_idx, :, :].reshape(1, -1)
- similarity = cosine_similarity_torch(x_1, x_2, eps=1e-20)
- if similarity > max_similarity:
- max_similarity = similarity
- select = graph_idx
-
- return output[select]
-
-class Seq2SeqAttrs:
- def __init__(self, args):
- #self.adj_mx = adj_mx
- self.max_diffusion_step = args.max_diffusion_step
- self.cl_decay_steps = args.cl_decay_steps
- self.filter_type = args.filter_type
- self.num_nodes = args.num_nodes
- self.num_rnn_layers = args.num_rnn_layers
- self.rnn_units = args.rnn_units
- self.hidden_state_size = self.num_nodes * self.rnn_units
-
-
-class EncoderModel(nn.Module, Seq2SeqAttrs):
- def __init__(self, args):
- nn.Module.__init__(self)
- Seq2SeqAttrs.__init__(self, args)
- self.input_dim = args.input_dim
- self.seq_len = args.seq_len # for the encoder
- self.device = args.device
- self.dcgru_layers = nn.ModuleList(
- [DCGRUCell(self.rnn_units, self.max_diffusion_step, self.num_nodes,
- filter_type=self.filter_type, device=self.device) for _ in range(self.num_rnn_layers)])
-
-
- def forward(self, inputs, adj, hidden_state=None):
- """
- Encoder forward pass.
- :param inputs: shape (batch_size, self.num_nodes * self.input_dim)
- :param hidden_state: (num_layers, batch_size, self.hidden_state_size)
- optional, zeros if not provided
- :return: output: # shape (batch_size, self.hidden_state_size)
- hidden_state # shape (num_layers, batch_size, self.hidden_state_size)
- (lower indices mean lower layers)
- """
- batch_size, _ = inputs.size()
- if hidden_state is None:
- hidden_state = torch.zeros((self.num_rnn_layers, batch_size, self.hidden_state_size),
- device=self.device)
- hidden_states = []
- output = inputs
- for layer_num, dcgru_layer in enumerate(self.dcgru_layers):
- next_hidden_state = dcgru_layer(output, hidden_state[layer_num], adj)
- hidden_states.append(next_hidden_state)
- output = next_hidden_state
-
- return output, torch.stack(hidden_states) # runs in O(num_layers) so not too slow
-
-
-class DecoderModel(nn.Module, Seq2SeqAttrs):
- def __init__(self, args):
- # super().__init__(is_training, adj_mx, **model_kwargs)
- nn.Module.__init__(self)
- Seq2SeqAttrs.__init__(self, args)
- self.output_dim = args.output_dim
- self.horizon = args.horizon # for the decoder
- self.projection_layer = nn.Linear(self.rnn_units, self.output_dim)
- self.device = args.device
- self.dcgru_layers = nn.ModuleList(
- [DCGRUCell(self.rnn_units, self.max_diffusion_step, self.num_nodes,
- filter_type=self.filter_type, device=self.device) for _ in range(self.num_rnn_layers)])
-
- def forward(self, inputs, adj, hidden_state=None):
- """
- :param inputs: shape (batch_size, self.num_nodes * self.output_dim)
- :param hidden_state: (num_layers, batch_size, self.hidden_state_size)
- optional, zeros if not provided
- :return: output: # shape (batch_size, self.num_nodes * self.output_dim)
- hidden_state # shape (num_layers, batch_size, self.hidden_state_size)
- (lower indices mean lower layers)
- """
- hidden_states = []
- output = inputs
- for layer_num, dcgru_layer in enumerate(self.dcgru_layers):
- next_hidden_state = dcgru_layer(output, hidden_state[layer_num], adj)
- hidden_states.append(next_hidden_state)
- output = next_hidden_state
-
- projected = self.projection_layer(output.view(-1, self.rnn_units))
- output = projected.view(-1, self.num_nodes * self.output_dim)
-
- return output, torch.stack(hidden_states)
-
-
-class BGSLF(nn.Module, Seq2SeqAttrs):
- """
- Paper:
- Balanced Spatial-Temporal Graph Structure Learning for Multivariate Time Series Forecasting: A Trade-off between Efficiency and Flexibility
- https://proceedings.mlr.press/v189/chen23a.html
-
- Official Codes:
- https://github.com/onceCWJ/BGSLF
- """
- def __init__(self, node_feas, temperature, args):
- super().__init__()
- Seq2SeqAttrs.__init__(self, args)
- self.args = args
- self.encoder_model = EncoderModel(args)
- self.decoder_model = DecoderModel(args)
- self.cl_decay_steps = args.cl_decay_steps
- self.use_curriculum_learning = args.use_curriculum_learning
- self.temperature = temperature
- self.embedding_size = args.embedding_size
- self.seq_len = args.seq_len
- self.feas_dim = args.feas_dim
- self.input_dim = args.input_dim
- self.kernel_size = args.kernel_size
- self.freq = args.freq
- self.requires_graph = args.requires_graph
- self.Adjacency_generator = Adjacency_generator(embedding_size=self.embedding_size, num_nodes = self.num_nodes, time_series = node_feas,
- kernel_size=self.kernel_size, freq=self.freq, requires_graph = self.requires_graph,
- seq_len=self.seq_len, feas_dim=self.feas_dim, input_dim = self.input_dim, device = args.device)
- self.device = args.device
-
-
- def _compute_sampling_threshold(self, batches_seen):
- return self.cl_decay_steps / (
- self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps))
-
- def encoder(self, inputs, adj):
- """
- Encoder forward pass
- :param inputs: shape (seq_len, batch_size, num_sensor * input_dim)
- :return: encoder_hidden_state: (num_layers, batch_size, self.hidden_state_size)
- """
- encoder_hidden_state = None
- for t in range(self.args.seq_len):
- _, encoder_hidden_state = self.encoder_model(inputs[t], adj, encoder_hidden_state)
-
- return encoder_hidden_state
-
- def decoder(self, encoder_hidden_state, adj, labels=None, batches_seen=None):
- """
- Decoder forward pass
- :param encoder_hidden_state: (num_layers, batch_size, self.hidden_state_size)
- :param labels: (self.horizon, batch_size, self.num_nodes * self.output_dim) [optional, not exist for inference]
- :param batches_seen: global step [optional, not exist for inference]
- :return: output: (self.horizon, batch_size, self.num_nodes * self.output_dim)
- """
- batch_size = encoder_hidden_state.size(1)
- go_symbol = torch.zeros((batch_size, self.num_nodes * self.decoder_model.output_dim),
- device=self.device)
- decoder_hidden_state = encoder_hidden_state
- decoder_input = go_symbol
-
- outputs = []
-
- for t in range(self.decoder_model.horizon):
- decoder_output, decoder_hidden_state = self.decoder_model(decoder_input, adj,
- decoder_hidden_state)
- decoder_input = decoder_output
- outputs.append(decoder_output)
- if self.training and self.use_curriculum_learning:
- c = np.random.uniform(0, 1)
- if c < self._compute_sampling_threshold(batches_seen):
- decoder_input = labels[t]
- outputs = torch.stack(outputs)
- return outputs
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- # history_data (torch.Tensor): shape [B, L, N, C]
- batch_size, seq_len, num_nodes, input_dim = history_data.shape
- inputs = history_data.permute(1, 0, 2, 3).contiguous().view(seq_len, -1, num_nodes * input_dim)
- if future_data is not None:
- labels = future_data[..., [0]].permute(1, 0, 2, 3).contiguous().view(seq_len, -1, num_nodes * 1)
- else:
- labels = None
-
- adj = SmoothSparseUnit(self.Adjacency_generator(inputs), 1, 0.02)
- # adj = F.relu(self.Adjacency_generator(inputs))
-
- encoder_hidden_state = self.encoder(inputs, adj)
- # print("Encoder complete, starting decoder")
- outputs = self.decoder(encoder_hidden_state, adj, labels, batches_seen=batch_seen)
- # print("Decoder complete")
- if batch_seen == 0:
- print( "Total trainable parameters {}".format(count_parameters(self)))
-
- return outputs.permute(1, 0, 2).unsqueeze(-1)
diff --git a/baselines/Crossformer/ETTh1.py b/baselines/Crossformer/ETTh1.py
index 546d9aad..a9cb7e83 100644
--- a/baselines/Crossformer/ETTh1.py
+++ b/baselines/Crossformer/ETTh1.py
@@ -1,43 +1,34 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Crossformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 720 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"seg_len": 24,
"win_size": 2,
# default parameters
@@ -49,69 +40,103 @@
"dropout": 0.2,
"baseline": False
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.00005
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 5],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/ETTh2.py b/baselines/Crossformer/ETTh2.py
deleted file mode 100644
index 61f7e907..00000000
--- a/baselines/Crossformer/ETTh2.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Crossformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
-NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "seg_len": 24,
- "win_size": 2,
- # default parameters
- "factor": 10,
- "d_model": 256,
- "d_ff": 512,
- "n_heads": 4,
- "e_layers": 3,
- "dropout": 0.2,
- "baseline": False
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.00001
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 5],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Crossformer/ETTm1.py b/baselines/Crossformer/ETTm1.py
index a47a7a1b..1ac2bccf 100644
--- a/baselines/Crossformer/ETTm1.py
+++ b/baselines/Crossformer/ETTm1.py
@@ -1,45 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Crossformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"seg_len": 12,
- "win_size": 2,
+ "win_size": 3,
# default parameters
"factor": 10,
"d_model": 256,
@@ -49,72 +39,103 @@
"dropout": 0.2,
"baseline": False
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0005
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1],
+ "milestones": [1, 5],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/ETTm2.py b/baselines/Crossformer/ETTm2.py
deleted file mode 100644
index 35f7612e..00000000
--- a/baselines/Crossformer/ETTm2.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Crossformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
-NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "seg_len": 12,
- "win_size": 2,
- # default parameters
- "factor": 10,
- "d_model": 256,
- "d_ff": 512,
- "n_heads": 4,
- "e_layers": 3,
- "dropout": 0.2,
- "baseline": False
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.00005
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Crossformer/Electricity.py b/baselines/Crossformer/Electricity.py
index 9545897f..718bc5e2 100644
--- a/baselines/Crossformer/Electricity.py
+++ b/baselines/Crossformer/Electricity.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Crossformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
NUM_NODES = 321
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"seg_len": 24,
"win_size": 2,
# default parameters
@@ -49,69 +39,103 @@
"dropout": 0.2,
"baseline": False
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 20, 40, 60, 80, 100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 200
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/ExchangeRate.py b/baselines/Crossformer/ExchangeRate.py
deleted file mode 100644
index 75d6ba76..00000000
--- a/baselines/Crossformer/ExchangeRate.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Crossformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
-NUM_NODES = 8
-CFG.MODEL.PARAM = {
- "data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "seg_len": 24,
- "win_size": 2,
- # default parameters
- "factor": 10,
- "d_model": 64,
- "d_ff": 128,
- "n_heads": 2,
- "e_layers": 3,
- "dropout": 0.2,
- "baseline": False
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 5],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 20
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Crossformer/PEMS04.py b/baselines/Crossformer/PEMS04.py
deleted file mode 100644
index 2fad132b..00000000
--- a/baselines/Crossformer/PEMS04.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Crossformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
-NUM_NODES = 307
-CFG.MODEL.PARAM = {
- "data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "seg_len": 24,
- "win_size": 2,
- # default parameters
- "factor": 10,
- "d_model": 256,
- "d_ff": 512,
- "n_heads": 4,
- "e_layers": 3,
- "dropout": 0.2,
- "baseline": False
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 5],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Crossformer/PEMS04_LTSF.py b/baselines/Crossformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..572472df
--- /dev/null
+++ b/baselines/Crossformer/PEMS04_LTSF.py
@@ -0,0 +1,145 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Crossformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 192 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "data_dim": NUM_NODES,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
+ "seg_len": 24,
+ "win_size": 2,
+ # default parameters
+ "factor": 10,
+ "d_model": 256,
+ "d_ff": 512,
+ "n_heads": 4,
+ "e_layers": 3,
+ "dropout": 0.2,
+ "baseline": False
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0002,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 5],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/PEMS08.py b/baselines/Crossformer/PEMS08.py
deleted file mode 100644
index efba2c1e..00000000
--- a/baselines/Crossformer/PEMS08.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import Crossformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
-NUM_NODES = 170
-CFG.MODEL.PARAM = {
- "data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "seg_len": 24,
- "win_size": 2,
- # default parameters
- "factor": 10,
- "d_model": 256,
- "d_ff": 512,
- "n_heads": 4,
- "e_layers": 3,
- "dropout": 0.2,
- "baseline": False
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 5],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Crossformer/PEMS08_LTSF.py b/baselines/Crossformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..d0d404f4
--- /dev/null
+++ b/baselines/Crossformer/PEMS08_LTSF.py
@@ -0,0 +1,145 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Crossformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "data_dim": NUM_NODES,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
+ "seg_len": 24,
+ "win_size": 2,
+ # default parameters
+ "factor": 10,
+ "d_model": 256,
+ "d_ff": 512,
+ "n_heads": 4,
+ "e_layers": 3,
+ "dropout": 0.2,
+ "baseline": False
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0002,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 5],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/Weather.py b/baselines/Crossformer/Weather.py
index d096ee9a..ec45d99f 100644
--- a/baselines/Crossformer/Weather.py
+++ b/baselines/Crossformer/Weather.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Crossformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Crossformer"
-CFG.MODEL.ARCH = Crossformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
NUM_NODES = 21
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"data_dim": NUM_NODES,
- "in_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+ "in_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"seg_len": 24,
"win_size": 2,
# default parameters
@@ -49,70 +39,103 @@
"dropout": 0.2,
"baseline": False
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.00005,
- "weight_decay": 0.0005,
+ "lr": 0.00005
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 5],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Crossformer/arch/attn.py b/baselines/Crossformer/arch/attn.py
index 2c77ab0b..80e4c0cc 100644
--- a/baselines/Crossformer/arch/attn.py
+++ b/baselines/Crossformer/arch/attn.py
@@ -14,7 +14,7 @@ def __init__(self, scale=None, attention_dropout=0.1):
super(FullAttention, self).__init__()
self.scale = scale
self.dropout = nn.Dropout(attention_dropout)
-
+
def forward(self, queries, keys, values):
B, L, H, E = queries.shape
_, S, _, D = values.shape
@@ -23,7 +23,7 @@ def forward(self, queries, keys, values):
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
-
+
return V.contiguous()
@@ -77,7 +77,7 @@ def __init__(self, seg_num, factor, d_model, n_heads, d_ff = None, dropout=0.1):
self.dim_sender = AttentionLayer(d_model, n_heads, dropout = dropout)
self.dim_receiver = AttentionLayer(d_model, n_heads, dropout = dropout)
self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
-
+
self.dropout = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
diff --git a/baselines/Crossformer/arch/cross_decoder.py b/baselines/Crossformer/arch/cross_decoder.py
index c4bb910e..5e981498 100644
--- a/baselines/Crossformer/arch/cross_decoder.py
+++ b/baselines/Crossformer/arch/cross_decoder.py
@@ -11,7 +11,7 @@ class DecoderLayer(nn.Module):
def __init__(self, seg_len, d_model, n_heads, d_ff=None, dropout=0.1, out_seg_num = 10, factor = 10):
super(DecoderLayer, self).__init__()
self.self_attention = TwoStageAttentionLayer(out_seg_num, factor, d_model, n_heads, \
- d_ff, dropout)
+ d_ff, dropout)
self.cross_attention = AttentionLayer(d_model, n_heads, dropout = dropout)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
@@ -30,7 +30,7 @@ def forward(self, x, cross):
batch = x.shape[0]
x = self.self_attention(x)
x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model')
-
+
cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model')
tmp = self.cross_attention(
x, cross, cross,
@@ -39,7 +39,7 @@ def forward(self, x, cross):
y = x = self.norm1(x)
y = self.MLP1(y)
dec_output = self.norm2(x+y)
-
+
dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b = batch)
layer_predict = self.linear_pred(dec_output)
layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len')
@@ -73,7 +73,7 @@ def forward(self, x, cross):
else:
final_predict = final_predict + layer_predict
i += 1
-
+
final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d = ts_d)
return final_predict
diff --git a/baselines/Crossformer/arch/cross_embed.py b/baselines/Crossformer/arch/cross_embed.py
index c81ddc0a..138d071b 100644
--- a/baselines/Crossformer/arch/cross_embed.py
+++ b/baselines/Crossformer/arch/cross_embed.py
@@ -18,5 +18,5 @@ def forward(self, x):
x_segment = rearrange(x, 'b (seg_num seg_len) d -> (b d seg_num) seg_len', seg_len = self.seg_len)
x_embed = self.linear(x_segment)
x_embed = rearrange(x_embed, '(b d seg_num) d_model -> b d seg_num d_model', b = batch, d = ts_dim)
-
+
return x_embed
\ No newline at end of file
diff --git a/baselines/Crossformer/arch/cross_encoder.py b/baselines/Crossformer/arch/cross_encoder.py
index 0b602064..aa0827ae 100644
--- a/baselines/Crossformer/arch/cross_encoder.py
+++ b/baselines/Crossformer/arch/cross_encoder.py
@@ -53,22 +53,22 @@ def __init__(self, win_size, d_model, n_heads, d_ff, depth, dropout, \
self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm)
else:
self.merge_layer = None
-
+
self.encode_layers = nn.ModuleList()
for i in range(depth):
self.encode_layers.append(TwoStageAttentionLayer(seg_num, factor, d_model, n_heads, \
d_ff, dropout))
-
+
def forward(self, x):
_, ts_dim, _, _ = x.shape
if self.merge_layer is not None:
x = self.merge_layer(x)
-
+
for layer in self.encode_layers:
- x = layer(x)
-
+ x = layer(x)
+
return x
class Encoder(nn.Module):
@@ -89,7 +89,7 @@ def __init__(self, e_blocks, win_size, d_model, n_heads, d_ff, block_depth, drop
def forward(self, x):
encode_x = []
encode_x.append(x)
-
+
for block in self.encode_blocks:
x = block(x)
encode_x.append(x)
diff --git a/baselines/Crossformer/arch/crossformer_arch.py b/baselines/Crossformer/arch/crossformer_arch.py
index 6df37b00..28d1f2b4 100644
--- a/baselines/Crossformer/arch/crossformer_arch.py
+++ b/baselines/Crossformer/arch/crossformer_arch.py
@@ -36,7 +36,7 @@ def __init__(self, data_dim, in_len, out_len, seg_len, win_size = 4,
# Encoder
self.encoder = Encoder(e_layers, win_size, d_model, n_heads, d_ff, block_depth = 1, \
dropout = dropout,in_seg_num = (self.pad_in_len // seg_len), factor = factor)
-
+
# Decoder
self.dec_pos_embedding = nn.Parameter(torch.randn(1, data_dim, (self.pad_out_len // seg_len), d_model))
self.decoder = Decoder(seg_len, e_layers + 1, d_model, n_heads, d_ff, dropout, \
@@ -55,7 +55,7 @@ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_s
x_seq = self.enc_value_embedding(x_seq)
x_seq += self.enc_pos_embedding
x_seq = self.pre_norm(x_seq)
-
+
enc_out = self.encoder(x_seq)
dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat = batch_size)
diff --git a/baselines/Crossformer/run.sh b/baselines/Crossformer/run.sh
deleted file mode 100644
index 4bcfc8fe..00000000
--- a/baselines/Crossformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Crossformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/PEMS08.py --gpus '0'
diff --git a/baselines/D2STGNN/METR-LA.py b/baselines/D2STGNN/METR-LA.py
index 716c8e75..d068db2b 100644
--- a/baselines/D2STGNN/METR-LA.py
+++ b/baselines/D2STGNN/METR-LA.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/PEMS-BAY.py b/baselines/D2STGNN/PEMS-BAY.py
index 26f4ef34..3ec11d41 100644
--- a/baselines/D2STGNN/PEMS-BAY.py
+++ b/baselines/D2STGNN/PEMS-BAY.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 30
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/PEMS03.py b/baselines/D2STGNN/PEMS03.py
index 4f3abd2e..919b58ce 100644
--- a/baselines/D2STGNN/PEMS03.py
+++ b/baselines/D2STGNN/PEMS03.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 150],
+ "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 30
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/PEMS04.py b/baselines/D2STGNN/PEMS04.py
index d9669ce8..f2da9873 100644
--- a/baselines/D2STGNN/PEMS04.py
+++ b/baselines/D2STGNN/PEMS04.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 150],
+ "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 30
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/PEMS07.py b/baselines/D2STGNN/PEMS07.py
index 261ab315..5a360bc8 100644
--- a/baselines/D2STGNN/PEMS07.py
+++ b/baselines/D2STGNN/PEMS07.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 150],
+ "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 30
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/PEMS08.py b/baselines/D2STGNN/PEMS08.py
index cc860ba2..065e7980 100644
--- a/baselines/D2STGNN/PEMS08.py
+++ b/baselines/D2STGNN/PEMS08.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import D2STGNN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "D2STGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "D2STGNN"
-CFG.MODEL.ARCH = D2STGNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = D2STGNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_feat": 1,
"num_hidden": 32,
"dropout": 0.1,
@@ -54,12 +43,71 @@
"time_in_day_size": 288,
"day_in_week_size": 7,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -67,66 +115,43 @@
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 150],
+ "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 30
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/D2STGNN/arch/d2stgnn_arch.py b/baselines/D2STGNN/arch/d2stgnn_arch.py
index 52d8c4cc..99230425 100644
--- a/baselines/D2STGNN/arch/d2stgnn_arch.py
+++ b/baselines/D2STGNN/arch/d2stgnn_arch.py
@@ -145,7 +145,7 @@ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_s
Args:
history_data (Tensor): Input data with shape: [B, L, N, C]
-
+
Returns:
torch.Tensor: outputs with shape [B, L, N, C]
"""
diff --git a/baselines/D2STGNN/arch/dynamic_graph_conv/utils/mask.py b/baselines/D2STGNN/arch/dynamic_graph_conv/utils/mask.py
index 1f3b6eca..04e429c8 100644
--- a/baselines/D2STGNN/arch/dynamic_graph_conv/utils/mask.py
+++ b/baselines/D2STGNN/arch/dynamic_graph_conv/utils/mask.py
@@ -5,7 +5,7 @@ class Mask(nn.Module):
def __init__(self, **model_args):
super().__init__()
self.mask = model_args['adjs']
-
+
def _mask(self, index, adj):
mask = self.mask[index] + torch.ones_like(self.mask[index]) * 1e-7
return mask.to(adj.device) * adj
diff --git a/baselines/D2STGNN/run.sh b/baselines/D2STGNN/run.sh
deleted file mode 100644
index 680dbff7..00000000
--- a/baselines/D2STGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/D2STGNN/METR-LA.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS-BAY.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS03.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS07.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS08.py --gpus '2'
diff --git a/baselines/DCRNN/METR-LA.py b/baselines/DCRNN/METR-LA.py
index 0af9a7d6..654d28e4 100644
--- a/baselines/DCRNN/METR-LA.py
+++ b/baselines/DCRNN/METR-LA.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,112 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
CFG.MODEL.SETUP_GRAPH = True
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.01,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30, 40, 50],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/PEMS-BAY.py b/baselines/DCRNN/PEMS-BAY.py
index 6c1c494e..23cba59d 100644
--- a/baselines/DCRNN/PEMS-BAY.py
+++ b/baselines/DCRNN/PEMS-BAY.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,112 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.01,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30, 40, 50],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/PEMS03.py b/baselines/DCRNN/PEMS03.py
index 4dd2fbb4..870be996 100644
--- a/baselines/DCRNN/PEMS03.py
+++ b/baselines/DCRNN/PEMS03.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,108 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
CFG.MODEL.SETUP_GRAPH = True
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.003,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [80],
"gamma": 0.3
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/PEMS04.py b/baselines/DCRNN/PEMS04.py
index efd87640..4741babd 100644
--- a/baselines/DCRNN/PEMS04.py
+++ b/baselines/DCRNN/PEMS04.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,112 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.003,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [80],
"gamma": 0.3
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/PEMS07.py b/baselines/DCRNN/PEMS07.py
index 4ba3d280..e68909c0 100644
--- a/baselines/DCRNN/PEMS07.py
+++ b/baselines/DCRNN/PEMS07.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,108 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.003,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [80],
"gamma": 0.3
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/PEMS08.py b/baselines/DCRNN/PEMS08.py
index 5f63afef..caceeca2 100644
--- a/baselines/DCRNN/PEMS08.py
+++ b/baselines/DCRNN/PEMS08.py
@@ -1,50 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DCRNN
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DCRNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DCRNN"
-CFG.MODEL.ARCH = DCRNN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DCRNN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"horizon": 12,
"input_dim": 2,
@@ -57,74 +40,108 @@
"adj_mx": [torch.tensor(i) for i in adj_mx],
"use_curriculum_learning": True
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.003,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [80],
"gamma": 0.3
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DCRNN/run.sh b/baselines/DCRNN/run.sh
deleted file mode 100644
index 679b7b88..00000000
--- a/baselines/DCRNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DCRNN/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS08.py --gpus '1'
diff --git a/baselines/DGCRN/METR-LA.py b/baselines/DGCRN/METR-LA.py
index 592384b3..26412749 100644
--- a/baselines/DGCRN/METR-LA.py
+++ b/baselines/DGCRN/METR-LA.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 207,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,117 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 150
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 150
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/PEMS-BAY.py b/baselines/DGCRN/PEMS-BAY.py
index b89d3e79..cbc32f1c 100644
--- a/baselines/DGCRN/PEMS-BAY.py
+++ b/baselines/DGCRN/PEMS-BAY.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 325,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,118 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True # Controls the `find_unused_parameters parameter` of `torch.nn.parallel.DistributedDataParallel`. In distributed computing, if there are unused parameters in the forward process, PyTorch usually raises a RuntimeError. In such cases, this parameter should be set to True.
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/PEMS03.py b/baselines/DGCRN/PEMS03.py
index cff5df78..fab37c3c 100644
--- a/baselines/DGCRN/PEMS03.py
+++ b/baselines/DGCRN/PEMS03.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 358,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,117 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 150
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 150
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/PEMS04.py b/baselines/DGCRN/PEMS04.py
index 76646d97..b7ce7e92 100644
--- a/baselines/DGCRN/PEMS04.py
+++ b/baselines/DGCRN/PEMS04.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 307,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,117 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 150
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 150
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/PEMS07.py b/baselines/DGCRN/PEMS07.py
index 25d68517..97f93698 100644
--- a/baselines/DGCRN/PEMS07.py
+++ b/baselines/DGCRN/PEMS07.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 883,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,117 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 24
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/PEMS08.py b/baselines/DGCRN/PEMS08.py
index 25a56f6d..3fced5c1 100644
--- a/baselines/DGCRN/PEMS08.py
+++ b/baselines/DGCRN/PEMS08.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import DGCRN
from .runner import DGCRNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DGCRN model configuration"
-CFG.RUNNER = DGCRNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DGCRN"
-CFG.MODEL.ARCH = DGCRN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DGCRN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_depth": 2,
"num_nodes": 170,
"predefined_A": [torch.Tensor(_) for _ in adj_mx],
@@ -53,78 +43,117 @@
"rnn_size": 64,
"hyperGNN_dim": 16
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DGCRNRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0.0001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[100, 150],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-## curriculum learning
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+# Curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 6
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DGCRN/run.sh b/baselines/DGCRN/run.sh
deleted file mode 100644
index 1b108cb2..00000000
--- a/baselines/DGCRN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DGCRN/METR-LA.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS-BAY.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS03.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS07.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS08.py --gpus '2'
diff --git a/baselines/DGCRN/runner/dgcrn_runner.py b/baselines/DGCRN/runner/dgcrn_runner.py
index 29bd4485..4f31375f 100644
--- a/baselines/DGCRN/runner/dgcrn_runner.py
+++ b/baselines/DGCRN/runner/dgcrn_runner.py
@@ -20,7 +20,7 @@ def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: b
"""
# preprocess
- future_data, history_data = data
+ future_data, history_data = data['target'], data['inputs']
history_data = self.to_running_device(history_data) # B, L, N, C
future_data = self.to_running_device(future_data) # B, L, N, C
batch_size, length, num_nodes, _ = future_data.shape
diff --git a/baselines/DLinear/ETTh1.py b/baselines/DLinear/ETTh1.py
index b96bdff2..c63b67e3 100644
--- a/baselines/DLinear/ETTh1.py
+++ b/baselines/DLinear/ETTh1.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/ETTh2.py b/baselines/DLinear/ETTh2.py
index 8e712a34..c6536f29 100644
--- a/baselines/DLinear/ETTh2.py
+++ b/baselines/DLinear/ETTh2.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/ETTm1.py b/baselines/DLinear/ETTm1.py
index 40751085..7ebeda41 100644
--- a/baselines/DLinear/ETTm1.py
+++ b/baselines/DLinear/ETTm1.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/ETTm2.py b/baselines/DLinear/ETTm2.py
index 725ed0cb..51404298 100644
--- a/baselines/DLinear/ETTm2.py
+++ b/baselines/DLinear/ETTm2.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/Electricity.py b/baselines/DLinear/Electricity.py
index 8769b4f9..5993ac67 100644
--- a/baselines/DLinear/Electricity.py
+++ b/baselines/DLinear/Electricity.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 321
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 321
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/ExchangeRate.py b/baselines/DLinear/ExchangeRate.py
index ac0dffc8..244d7b9d 100644
--- a/baselines/DLinear/ExchangeRate.py
+++ b/baselines/DLinear/ExchangeRate.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 8
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 8
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/METR-LA.py b/baselines/DLinear/METR-LA.py
deleted file mode 100644
index 0f6b77b5..00000000
--- a/baselines/DLinear/METR-LA.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import DLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 207
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DLinear/PEMS-BAY.py b/baselines/DLinear/PEMS-BAY.py
deleted file mode 100644
index e80091e2..00000000
--- a/baselines/DLinear/PEMS-BAY.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import DLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 325
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DLinear/PEMS04.py b/baselines/DLinear/PEMS04.py
deleted file mode 100644
index 9ae9282c..00000000
--- a/baselines/DLinear/PEMS04.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import DLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 307
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DLinear/PEMS04_LTSF.py b/baselines/DLinear/PEMS04_LTSF.py
new file mode 100644
index 00000000..38367c0d
--- /dev/null
+++ b/baselines/DLinear/PEMS04_LTSF.py
@@ -0,0 +1,138 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 307
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/PEMS08.py b/baselines/DLinear/PEMS08.py
deleted file mode 100644
index da1ff5af..00000000
--- a/baselines/DLinear/PEMS08.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import DLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 170
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DLinear/PEMS08_LTSF.py b/baselines/DLinear/PEMS08_LTSF.py
new file mode 100644
index 00000000..321c8d4c
--- /dev/null
+++ b/baselines/DLinear/PEMS08_LTSF.py
@@ -0,0 +1,138 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 170
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/Weather.py b/baselines/DLinear/Weather.py
index ef82560c..354a3fe3 100644
--- a/baselines/DLinear/Weather.py
+++ b/baselines/DLinear/Weather.py
@@ -1,108 +1,135 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "individual": False,
+ "enc_in": 21
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DLinear"
-CFG.MODEL.ARCH = DLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 21
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DLinear/run.sh b/baselines/DLinear/run.sh
deleted file mode 100644
index 910b0972..00000000
--- a/baselines/DLinear/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DLinear/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/Weather.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/PEMS08.py --gpus '0'
diff --git a/baselines/DSFormer/ETTh1.py b/baselines/DSFormer/ETTh1.py
index 1b7bc9e9..1c597552 100644
--- a/baselines/DSFormer/ETTh1.py
+++ b/baselines/DSFormer/ETTh1.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 3407
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,73 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/ETTh2.py b/baselines/DSFormer/ETTh2.py
index 45c91c83..d3be0578 100644
--- a/baselines/DSFormer/ETTh2.py
+++ b/baselines/DSFormer/ETTh2.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,73 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/ETTm1.py b/baselines/DSFormer/ETTm1.py
index 2066b78f..6cc63708 100644
--- a/baselines/DSFormer/ETTm1.py
+++ b/baselines/DSFormer/ETTm1.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,72 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002
+ "lr": 0.002,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/ETTm2.py b/baselines/DSFormer/ETTm2.py
index 2956fe67..7173f7e8 100644
--- a/baselines/DSFormer/ETTm2.py
+++ b/baselines/DSFormer/ETTm2.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 3407
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,73 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/Electricity.py b/baselines/DSFormer/Electricity.py
index b99f8ed6..da2a4a31 100644
--- a/baselines/DSFormer/Electricity.py
+++ b/baselines/DSFormer/Electricity.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 3407
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 321
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,73 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,15,25,50,75,100],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/ExchangeRate.py b/baselines/DSFormer/ExchangeRate.py
index a62e163e..fbfb8e6f 100644
--- a/baselines/DSFormer/ExchangeRate.py
+++ b/baselines/DSFormer/ExchangeRate.py
@@ -1,42 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 8
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.15,
@@ -45,72 +36,106 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/Illness.py b/baselines/DSFormer/Illness.py
index 22a1827d..7657d620 100644
--- a/baselines/DSFormer/Illness.py
+++ b/baselines/DSFormer/Illness.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Illness"
-CFG.DATASET_TYPE = "Illness"
-CFG.DATASET_INPUT_LEN = 60
-CFG.DATASET_OUTPUT_LEN = 60
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 3407
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Illness' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 2,
"dropout": 0.3,
@@ -45,69 +35,106 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002
+ "lr": 0.002,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50,75,100],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 3.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 60]
+
+# Evaluation parameters
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/METR-LA.py b/baselines/DSFormer/METR-LA.py
deleted file mode 100644
index c64ab583..00000000
--- a/baselines/DSFormer/METR-LA.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import DSFormer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "traffic speed"
-CFG.DATASET_INPUT_LEN = 288
-CFG.DATASET_OUTPUT_LEN = 288
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
-NUM_NODES = 207
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "num_id": NUM_NODES,
- "num_layer": 1,
- "dropout": 0.15,
- "muti_head": 2,
- "num_samp": 3,
- "IF_node": True,
- "IF_REVIN":True
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,10,25,50,75,100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 48, 96, 144, 192, 288]
diff --git a/baselines/DSFormer/PEMS04.py b/baselines/DSFormer/PEMS04.py
deleted file mode 100644
index f0dd8b56..00000000
--- a/baselines/DSFormer/PEMS04.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import DSFormer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
-NUM_NODES = 307
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "num_id": NUM_NODES,
- "num_layer": 1,
- "dropout": 0.3,
- "muti_head": 2,
- "num_samp": 3,
- "IF_node": True,
- "IF_REVIN":True
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,10,25,50,75,100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DSFormer/PEMS04_LTSF.py b/baselines/DSFormer/PEMS04_LTSF.py
new file mode 100644
index 00000000..3b919560
--- /dev/null
+++ b/baselines/DSFormer/PEMS04_LTSF.py
@@ -0,0 +1,144 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DSFormer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
+ "num_id": NUM_NODES,
+ "num_layer": 1,
+ "dropout": 0.3,
+ "muti_head": 2,
+ "num_samp": 3,
+ "IF_node": True,
+ "IF_REVIN":True
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 10, 25, 50, 75, 100],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/PEMS08.py b/baselines/DSFormer/PEMS08.py
deleted file mode 100644
index 4326dedd..00000000
--- a/baselines/DSFormer/PEMS08.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import DSFormer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Crossformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
-NUM_NODES = 170
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
- "num_id": NUM_NODES,
- "num_layer": 1,
- "dropout": 0.3,
- "muti_head": 2,
- "num_samp": 3,
- "IF_node": True,
- "IF_REVIN":True
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,10,25,50,75,100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/DSFormer/PEMS08_LTSF.py b/baselines/DSFormer/PEMS08_LTSF.py
new file mode 100644
index 00000000..00e5717d
--- /dev/null
+++ b/baselines/DSFormer/PEMS08_LTSF.py
@@ -0,0 +1,144 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DSFormer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
+ "num_id": NUM_NODES,
+ "num_layer": 1,
+ "dropout": 0.3,
+ "muti_head": 2,
+ "num_samp": 3,
+ "IF_node": True,
+ "IF_REVIN":True
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 10, 25, 50, 75, 100],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/Traffic.py b/baselines/DSFormer/Traffic.py
index 773039d5..a568c60b 100644
--- a/baselines/DSFormer/Traffic.py
+++ b/baselines/DSFormer/Traffic.py
@@ -1,42 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Traffic"
-CFG.DATASET_TYPE = "Traffic"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Traffic' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 862
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
"dropout": 0.3,
@@ -45,73 +35,107 @@
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1,5,15,25,50,75,100],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96,192, 288, 336]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/Weather.py b/baselines/DSFormer/Weather.py
index 65ab24f8..2d2d7de0 100644
--- a/baselines/DSFormer/Weather.py
+++ b/baselines/DSFormer/Weather.py
@@ -1,116 +1,141 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DSFormer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DSFormer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DSFormer"
-CFG.MODEL.ARCH = DSFormer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
NUM_NODES = 21
-CFG.MODEL.PARAM = {
- "Input_len": CFG.DATASET_INPUT_LEN,
- "out_len": CFG.DATASET_OUTPUT_LEN,
+MODEL_PARAM = {
+ "Input_len": INPUT_LEN,
+ "out_len": OUTPUT_LEN,
"num_id": NUM_NODES,
"num_layer": 1,
- "dropout": 0.15,
- "muti_head": 2,
- "num_samp": 2,
+ "dropout": 0.3,
+ "muti_head": 1,
+ "num_samp": 3,
"IF_node": True,
"IF_REVIN":True
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [5,15,25,50,75,100],
+ "milestones": [1, 5, 15, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 3.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DSFormer/run.sh b/baselines/DSFormer/run.sh
deleted file mode 100644
index 0f28e838..00000000
--- a/baselines/DSFormer/run.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DSFormer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Illness.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Traffic.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/PEMS08.py --gpus '0'
diff --git a/baselines/DeepAR/ETTh1.py b/baselines/DeepAR/ETTh1.py
index 94f336f5..400203e0 100644
--- a/baselines/DeepAR/ETTh1.py
+++ b/baselines/DeepAR/ETTh1.py
@@ -1,103 +1,128 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 7
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 7
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/ETTm1.py b/baselines/DeepAR/ETTm1.py
index 00533a5c..190b2fbe 100644
--- a/baselines/DeepAR/ETTm1.py
+++ b/baselines/DeepAR/ETTm1.py
@@ -1,103 +1,128 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 7
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 7
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/Electricity.py b/baselines/DeepAR/Electricity.py
index b4eed750..21107948 100644
--- a/baselines/DeepAR/Electricity.py
+++ b/baselines/DeepAR/Electricity.py
@@ -1,103 +1,128 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 321
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 321
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 15
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/ExchangeRate.py b/baselines/DeepAR/ExchangeRate.py
index 2c174d3f..9dae540a 100644
--- a/baselines/DeepAR/ExchangeRate.py
+++ b/baselines/DeepAR/ExchangeRate.py
@@ -1,103 +1,129 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+NUM_NODES = 8
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 7
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 8
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 15
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/METR-LA.py b/baselines/DeepAR/METR-LA.py
deleted file mode 100644
index 61184559..00000000
--- a/baselines/DeepAR/METR-LA.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-
-from .arch import DeepAR
-from .runner import DeepARRunner
-from .loss import gaussian_loss
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
-CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 207
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gaussian_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/DeepAR/PEMS-BAY.py b/baselines/DeepAR/PEMS-BAY.py
deleted file mode 100644
index 22c44e4d..00000000
--- a/baselines/DeepAR/PEMS-BAY.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-
-from .arch import DeepAR
-from .runner import DeepARRunner
-from .loss import gaussian_loss
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
-CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 325
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gaussian_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/DeepAR/PEMS03.py b/baselines/DeepAR/PEMS03.py
deleted file mode 100644
index b97b0f1e..00000000
--- a/baselines/DeepAR/PEMS03.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-
-from .arch import DeepAR
-from .runner import DeepARRunner
-from .loss import gaussian_loss
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
-CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 358
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gaussian_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/DeepAR/PEMS04.py b/baselines/DeepAR/PEMS04.py
index a74cc390..adf0e7c6 100644
--- a/baselines/DeepAR/PEMS04.py
+++ b/baselines/DeepAR/PEMS04.py
@@ -1,104 +1,129 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 307
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= model ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 307
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 32
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/PEMS04_LTSF.py b/baselines/DeepAR/PEMS04_LTSF.py
index 51e015a0..69218ce3 100644
--- a/baselines/DeepAR/PEMS04_LTSF.py
+++ b/baselines/DeepAR/PEMS04_LTSF.py
@@ -1,100 +1,131 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 307
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= model ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 307
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 24
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 32
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/PEMS07.py b/baselines/DeepAR/PEMS07.py
deleted file mode 100644
index be72c230..00000000
--- a/baselines/DeepAR/PEMS07.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-
-from .arch import DeepAR
-from .runner import DeepARRunner
-from .loss import gaussian_loss
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
-CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 883
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gaussian_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/DeepAR/PEMS08.py b/baselines/DeepAR/PEMS08.py
index 47e0f57b..7cd05e68 100644
--- a/baselines/DeepAR/PEMS08.py
+++ b/baselines/DeepAR/PEMS08.py
@@ -1,104 +1,129 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 170
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 170
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 16
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/PEMS08_LTSF.py b/baselines/DeepAR/PEMS08_LTSF.py
index 37776690..44af96a1 100644
--- a/baselines/DeepAR/PEMS08_LTSF.py
+++ b/baselines/DeepAR/PEMS08_LTSF.py
@@ -1,100 +1,131 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 170
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= model ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 170
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 16
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/Weather.py b/baselines/DeepAR/Weather.py
index 65bf134a..2202f655 100644
--- a/baselines/DeepAR/Weather.py
+++ b/baselines/DeepAR/Weather.py
@@ -1,103 +1,128 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import DeepAR
from .runner import DeepARRunner
from .loss import gaussian_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+ 'cov_feat_size' : 2,
+ 'embedding_size' : 32,
+ 'hidden_size' : 64,
+ 'num_layers': 3,
+ 'use_ts_id' : True,
+ 'id_feat_size': 32,
+ 'num_nodes': 21
+ }
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "DeepAR model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = DeepARRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "DeepAR"
-CFG.MODEL.ARCH = DeepAR
-CFG.MODEL.PARAM = {
- "cov_feat_size" : 2,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : True,
- "id_feat_size": 32,
- "num_nodes": 21
- }
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.003,
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr':0.003,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 15
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/DeepAR/run.sh b/baselines/DeepAR/run.sh
deleted file mode 100644
index d8c1b259..00000000
--- a/baselines/DeepAR/run.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-# python experiments/train.py -c baselines/DeepAR/METR-LA.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/PEMS-BAY.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/PEMS03.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/PEMS04.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/PEMS07.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/PEMS08.py --gpus '0'
-
-# python experiments/train.py -c baselines/DeepAR/ETTh1.py --gpus '0'
-# python experiments/train.py -c baselines/DeepAR/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/DeepAR/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/DeepAR/Weather.py --gpus '0'
-python experiments/train.py -c baselines/DeepAR/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/DeepAR/PEMS04_LTSF.py --gpus '0'
-python experiments/train.py -c baselines/DeepAR/PEMS08_LTSF.py --gpus '0'
\ No newline at end of file
diff --git a/baselines/DeepAR/runner/deepar_runner.py b/baselines/DeepAR/runner/deepar_runner.py
index 275a30b5..6f132ba8 100644
--- a/baselines/DeepAR/runner/deepar_runner.py
+++ b/baselines/DeepAR/runner/deepar_runner.py
@@ -1,6 +1,10 @@
-from typing import Dict
+import os
+import json
+from typing import Dict, Optional
+
import torch
-from basicts.data.registry import SCALER_REGISTRY
+import numpy as np
+from tqdm import tqdm
from easytorch.utils.dist import master_only
from basicts.runners import BaseTimeSeriesForecastingRunner
@@ -11,7 +15,7 @@ def __init__(self, cfg: dict):
super().__init__(cfg)
self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
self.target_features = cfg["MODEL"].get("TARGET_FEATURES", None)
- self.output_seq_len = cfg["DATASET_OUTPUT_LEN"]
+ self.output_seq_len = cfg["DATASET"]["PARAM"]["output_len"]
def select_input_features(self, data: torch.Tensor) -> torch.Tensor:
"""Select input features and reshape data to fit the target model.
@@ -42,55 +46,75 @@ def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
data = data[:, :, :, self.target_features]
return data
- def rescale_data(self, input_data: Dict) -> Dict:
- """Rescale data.
+ def postprocessing(self, input_data: Dict) -> Dict:
+ """Postprocess data.
Args:
- data (Dict): Dict of data to be re-scaled.
+ input_data (Dict): Dictionary containing data to be processed.
Returns:
- Dict: Dict re-scaled data.
+ Dict: Processed data.
"""
- if self.if_rescale:
- input_data["inputs"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["inputs"], **self.scaler["args"])
- input_data["prediction"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["prediction"], **self.scaler["args"])
- input_data["target"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["target"], **self.scaler["args"])
+ if self.scaler is not None and self.scaler.rescale:
+ input_data['prediction'] = self.scaler.inverse_transform(input_data['prediction'])
+ input_data['target'] = self.scaler.inverse_transform(input_data['target'])
+ input_data['inputs'] = self.scaler.inverse_transform(input_data['inputs'])
if "mus" in input_data.keys():
- input_data["mus"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["mus"], **self.scaler["args"])
+ input_data['mus'] = self.scaler.inverse_transform(input_data['mus'])
if "sigmas" in input_data.keys():
- input_data["sigmas"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["sigmas"], **self.scaler["args"])
+ input_data['sigmas'] = self.scaler.inverse_transform(input_data['sigmas'])
+ # TODO: add more postprocessing steps as needed.
return input_data
@torch.no_grad()
@master_only
- def test(self):
- """Evaluate the model.
-
+ def test(self, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> Dict:
+ """Test process.
+
Args:
- train_epoch (int, optional): current epoch if in training process.
+ train_epoch (Optional[int]): Current epoch if in training process.
+ save_metrics (bool): Save the test metrics. Defaults to False.
+ save_results (bool): Save the test results. Defaults to False.
"""
- # test loop
- prediction =[]
- target = []
- inputs = []
- for _, data in enumerate(self.test_data_loader):
+ prediction, target, inputs = [], [], []
+
+ for data in tqdm(self.test_data_loader):
+ data = self.preprocessing(data)
forward_return = self.forward(data, epoch=None, iter_num=None, train=False)
+ forward_return = self.postprocessing(forward_return)
+
if not self.if_evaluate_on_gpu:
- forward_return["prediction"] = forward_return["prediction"].detach().cpu()
- forward_return["target"] = forward_return["target"].detach().cpu()
- forward_return["inputs"] = forward_return["inputs"].detach().cpu()
- prediction.append(forward_return["prediction"])
- target.append(forward_return["target"])
- inputs.append(forward_return["inputs"])
+ forward_return['prediction'] = forward_return['prediction'].detach().cpu()
+ forward_return['target'] = forward_return['target'].detach().cpu()
+ forward_return['inputs'] = forward_return['inputs'].detach().cpu()
+
+ prediction.append(forward_return['prediction'])
+ target.append(forward_return['target'])
+ inputs.append(forward_return['inputs'])
+
prediction = torch.cat(prediction, dim=0)
target = torch.cat(target, dim=0)
inputs = torch.cat(inputs, dim=0)
- # re-scale data
- returns_all = self.rescale_data({"prediction": prediction[:, -self.output_seq_len:, :, :], "target": target[:, -self.output_seq_len:, :, :], "inputs": inputs})
- # evaluate
- self.evaluate(returns_all)
+
+ returns_all = {'prediction': prediction[:, -self.output_seq_len:, :, :],
+ 'target': target[:, -self.output_seq_len:, :, :],
+ 'inputs': inputs}
+ metrics_results = self.compute_evaluation_metrics(returns_all)
+
+ # save
+ if save_results:
+ # save returns_all to self.ckpt_save_dir/test_results.npz
+ test_results = {k: v.cpu().numpy() for k, v in returns_all.items()}
+ np.savez(os.path.join(self.ckpt_save_dir, 'test_results.npz'), **test_results)
+
+ if save_metrics:
+ # save metrics_results to self.ckpt_save_dir/test_metrics.json
+ with open(os.path.join(self.ckpt_save_dir, 'test_metrics.json'), 'w') as f:
+ json.dump(metrics_results, f, indent=4)
+
+ return returns_all
def forward(self, data: tuple, epoch:int = None, iter_num: int = None, train:bool = True, **kwargs) -> tuple:
"""feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
@@ -105,16 +129,18 @@ def forward(self, data: tuple, epoch:int = None, iter_num: int = None, train:boo
dict: keys that must be included: inputs, prediction, target
"""
- # preprocess
- future_data, history_data = data
- history_data = self.to_running_device(history_data) # B, L, N, C
- future_data = self.to_running_device(future_data) # B, L, N, C
-
+ # Preprocess input data
+ future_data, history_data = data['target'], data['inputs']
+ history_data = self.to_running_device(history_data) # Shape: [B, L, N, C]
+ future_data = self.to_running_device(future_data) # Shape: [B, L, N, C]
+
+ # Select input features
history_data = self.select_input_features(history_data)
future_data_4_dec = self.select_input_features(future_data)
- # model forward
- model_return = self.model(history_data=history_data, future_data=future_data_4_dec, batch_seen=iter_num, epoch=epoch, train=train)
+ # Forward pass through the model
+ model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
+ batch_seen=iter_num, epoch=epoch, train=train)
# parse model return
if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return}
diff --git a/baselines/DeepAR_M4/M4.py b/baselines/DeepAR_M4/M4.py
deleted file mode 100644
index e529685b..00000000
--- a/baselines/DeepAR_M4/M4.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import M4ForecastingDataset
-
-from .arch import DeepAR
-from .loss import gaussian_loss
-from .runner import DeepARRunner
-
-def get_cfg(seasonal_pattern):
- assert seasonal_pattern in ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- prediction_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- num_nodes = {"Yearly": 23000, "Quarterly": 24000, "Monthly": 48000, "Weekly": 359, "Daily": 4227, "Hourly": 414}[seasonal_pattern]
- history_size = 2
- history_len = history_size * prediction_len
-
- CFG = EasyDict()
-
- # ================= general ================= #
- CFG.DESCRIPTION = "DeepAR M4"
- CFG.RUNNER = DeepARRunner
- CFG.DATASET_CLS = M4ForecastingDataset
- CFG.DATASET_NAME = "M4_" + seasonal_pattern
- CFG.DATASET_INPUT_LEN = history_len
- CFG.DATASET_OUTPUT_LEN = prediction_len
- CFG.GPU_NUM = 1
-
- # ================= environment ================= #
- CFG.ENV = EasyDict()
- CFG.ENV.SEED = 1
- CFG.ENV.CUDNN = EasyDict()
- CFG.ENV.CUDNN.ENABLED = True
-
- # ================= model ================= #
- CFG.MODEL = EasyDict()
- CFG.MODEL.NAME = "DeepAR"
- CFG.MODEL.ARCH = DeepAR
- CFG.MODEL.PARAM = {
- "cov_feat_size" : 0,
- "embedding_size" : 32,
- "hidden_size" : 64,
- "num_layers": 3,
- "use_ts_id" : False,
- "id_feat_size": None,
- "num_nodes": None
- }
- CFG.MODEL.FORWARD_FEATURES = [0] # values, node id
- CFG.MODEL.TARGET_FEATURES = [0]
-
- # ================= optim ================= #
- CFG.TRAIN = EasyDict()
- CFG.TRAIN.LOSS = gaussian_loss
- CFG.TRAIN.OPTIM = EasyDict()
- CFG.TRAIN.OPTIM.TYPE = "Adam"
- CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005,
- "weight_decay": 0.0001,
- }
- CFG.TRAIN.LR_SCHEDULER = EasyDict()
- CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
- CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 80],
- "gamma": 0.5
- }
-
- # ================= train ================= #
- CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
- }
- CFG.TRAIN.NUM_EPOCHS = 100
- CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
- )
- # train data
- CFG.TRAIN.DATA = EasyDict()
- # read data
- CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TRAIN.DATA.BATCH_SIZE = 64
- CFG.TRAIN.DATA.PREFETCH = False
- CFG.TRAIN.DATA.SHUFFLE = True
- CFG.TRAIN.DATA.NUM_WORKERS = 2
- CFG.TRAIN.DATA.PIN_MEMORY = False
-
- # ================= test ================= #
- CFG.TEST = EasyDict()
- CFG.TEST.INTERVAL = 1
- # test data
- CFG.TEST.DATA = EasyDict()
- # read data
- CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TEST.DATA.BATCH_SIZE = 64
- CFG.TEST.DATA.PREFETCH = False
- CFG.TEST.DATA.SHUFFLE = False
- CFG.TEST.DATA.NUM_WORKERS = 2
- CFG.TEST.DATA.PIN_MEMORY = False
-
- # ================= evaluate ================= #
- CFG.EVAL = EasyDict()
- CFG.EVAL.HORIZONS = []
- CFG.EVAL.SAVE_PATH = os.path.abspath(__file__ + "/..")
-
- return CFG
diff --git a/baselines/DeepAR_M4/arch/__init__.py b/baselines/DeepAR_M4/arch/__init__.py
deleted file mode 100644
index 6ec10582..00000000
--- a/baselines/DeepAR_M4/arch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .deepar import DeepAR
\ No newline at end of file
diff --git a/baselines/DeepAR_M4/arch/deepar.py b/baselines/DeepAR_M4/arch/deepar.py
deleted file mode 100644
index d18c7a66..00000000
--- a/baselines/DeepAR_M4/arch/deepar.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from .distributions import Gaussian
-
-
-class DeepAR(nn.Module):
- """
- Paper: DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks; Link: https://arxiv.org/abs/1704.04110; Ref Code: https://github.com/jingw2/demand_forecast, https://github.com/husnejahan/DeepAR-pytorch, https://github.com/arrigonialberto86/deepar.
- """
-
- def __init__(self, cov_feat_size, embedding_size, hidden_size, num_layers, use_ts_id, id_feat_size=0, num_nodes=0) -> None:
- """Init DeepAR.
-
- Args:
- cov_feat_size (int): covariate feature size (e.g. time in day, day in week, etc.).
- embedding_size (int): output size of the input embedding layer.
- hidden_size (int): hidden size of the LSTM.
- num_layers (int): number of LSTM layers.
- use_ts_id (bool): whether to use time series id to construct spatial id embedding as additional features.
- id_feat_size (int, optional): size of the spatial id embedding. Defaults to 0.
- num_nodes (int, optional): number of nodes. Defaults to 0.
- """
- super().__init__()
- self.use_ts_id = use_ts_id
- # input embedding layer
- self.input_embed = nn.Linear(1, embedding_size)
- # spatial id embedding layer
- if use_ts_id:
- assert id_feat_size > 0, "id_feat_size must be greater than 0 if use_ts_id is True"
- assert num_nodes > 0, "num_nodes must be greater than 0 if use_ts_id is True"
- self.id_feat = nn.Parameter(torch.empty(num_nodes, id_feat_size))
- nn.init.xavier_uniform_(self.id_feat)
- else:
- id_feat_size = 0
- # the LSTM layer
- self.encoder = nn.LSTM(embedding_size+cov_feat_size+id_feat_size, hidden_size, num_layers, bias=True, batch_first=True)
- # the likelihood function
- self.likelihood_layer = Gaussian(hidden_size, 1)
-
- def gaussian_sample(self, mu, sigma):
- """Sampling.
-
- Args:
- mu (torch.Tensor): mean values of distributions.
- sigma (torch.Tensor): std values of distributions.
- """
- mu = mu.squeeze(1)
- sigma = sigma.squeeze(1)
- gaussian = torch.distributions.Normal(mu, sigma)
- ypred = gaussian.sample([1]).squeeze(0)
- return ypred
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, train: bool, history_mask: torch.Tensor, future_mask: torch.Tensor, **kwargs) -> torch.Tensor:
- """Feed forward of DeepAR.
- Reference code: https://github.com/jingw2/demand_forecast/blob/master/deepar.py
-
- Args:
- history_data (torch.Tensor): history data. [B, L, N, C].
- future_data (torch.Tensor): future data. [B, L, N, C].
- train (bool): is training or not.
- """
- mask = torch.cat([history_mask, future_mask], dim=1).unsqueeze(-1)[:, 1:, ...]
- # mask = torch.where(mask == 0, torch.ones_like(mask) * 1e-5, mask)
- # mask = torch.ones_like(mask)
- # nornalization
- means = history_data.mean(1, keepdim=True).detach()
- stdev = torch.sqrt(torch.var(history_data, dim=1, keepdim=True, unbiased=False) + 1e-5)
- history_data_normed = history_data - means
- history_data_normed /= stdev
- future_data_normed = future_data - means
- future_data_normed /= stdev
-
- history_next = None
- preds = []
- mus = []
- sigmas = []
- len_in, len_out = history_data.shape[1], future_data.shape[1]
- B, _, N, C = history_data.shape
- input_feat_full_normed = torch.cat([history_data_normed[:, :, :, 0:1], future_data_normed[:, :, :, 0:1]], dim=1) # B, L_in+L_out, N, 1
- input_feat_full = torch.cat([history_data[:, :, :, 0:1], future_data[:, :, :, 0:1]], dim=1) # B, L_in+L_out, N, 1
-
- for t in range(1, len_in + len_out):
- if not (t > len_in and not train): # not in the decoding stage when inferecing
- history_next = input_feat_full_normed[:, t-1:t, :, 0:1]
- embed_feat = self.input_embed(history_next)
- # 检查nan
- assert not torch.isnan(history_next).any(), "history_next中存在nan"
- assert not torch.isnan(self.input_embed.weight).any(), "embed_feat中存在nan"
- assert not torch.isnan(self.input_embed.bias).any(), "embed_feat中存在nan"
- assert not torch.isnan(embed_feat).any(), "embed_feat中存在nan"
- encoder_input = embed_feat
- # lstm
- B, _, N, C = encoder_input.shape # _ is 1
- encoder_input = encoder_input.transpose(1, 2).reshape(B * N, -1, C)
- _, (h, c) = self.encoder(encoder_input) if t == 1 else self.encoder(encoder_input, (h, c))
- # distribution proj
- mu, sigma = self.likelihood_layer(h[-1, :, :])
- history_next = self.gaussian_sample(mu, sigma).view(B, N).view(B, 1, N, 1)
- mus.append(mu.view(B, N, 1).unsqueeze(1))
- sigmas.append(sigma.view(B, N, 1).unsqueeze(1))
- preds.append(history_next)
- assert not torch.isnan(history_next).any()
-
- preds = torch.concat(preds, dim=1)
- mus = torch.concat(mus, dim=1)
- sigmas = torch.concat(sigmas, dim=1)
- reals = input_feat_full[:, -preds.shape[1]:, :, :]
-
- # 检查mus和sigmas中是否存在nan
- assert not torch.isnan(mus).any(), "mus中存在nan"
- assert not torch.isnan(sigmas).any(), "sigmas中存在nan"
-
- # denormalization
- preds = preds * stdev + means
- mus = mus * stdev + means
- sigmas = sigmas * stdev + means
-
- return {"prediction": preds * mask, "target": reals * mask, "mus": mus, "sigmas": sigmas, "mask_prior": mask}
diff --git a/baselines/DeepAR_M4/arch/distributions.py b/baselines/DeepAR_M4/arch/distributions.py
deleted file mode 100644
index 0c84d512..00000000
--- a/baselines/DeepAR_M4/arch/distributions.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import torch
-import torch.nn as nn
-
-
-class Gaussian(nn.Module):
-
- def __init__(self, hidden_size, output_size):
- """
- Gaussian Likelihood Supports Continuous Data
- Args:
- input_size (int): hidden h_{i,t} column size
- output_size (int): embedding size
- """
- super(Gaussian, self).__init__()
- self.mu_layer = nn.Linear(hidden_size, output_size)
- self.sigma_layer = nn.Linear(hidden_size, output_size)
-
- def forward(self, h):
- sigma_t = torch.log(1 + torch.exp(self.sigma_layer(h))) + 1e-6
- sigma_t = sigma_t.squeeze(0)
- mu_t = self.mu_layer(h).squeeze(0)
- return mu_t, sigma_t
diff --git a/baselines/DeepAR_M4/loss/__init__.py b/baselines/DeepAR_M4/loss/__init__.py
deleted file mode 100644
index 9b08b8a3..00000000
--- a/baselines/DeepAR_M4/loss/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .gaussian import gaussian_loss
\ No newline at end of file
diff --git a/baselines/DeepAR_M4/loss/gaussian.py b/baselines/DeepAR_M4/loss/gaussian.py
deleted file mode 100644
index 9cf53631..00000000
--- a/baselines/DeepAR_M4/loss/gaussian.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import torch
-import numpy as np
-from basicts.metrics import masked_mae
-
-def masked_mae_loss(prediction, target, pred_len, null_val = np.nan):
- prediction = prediction[:, -pred_len:, :, :]
- target = target[:, -pred_len:, :, :]
- return masked_mae(prediction, target, null_val)
-
-def gaussian_loss(prediction, target, mus, sigmas, mask_prior, null_val = np.nan):
- """Masked gaussian loss. Kindly note that the gaussian loss is calculated based on mu, sigma, and target. The prediction is sampled from N(mu, sigma), and is not used in the loss calculation (it will be used in the metrics calculation).
-
- Args:
- prediction (torch.Tensor): prediction of model. [B, L, N, 1].
- target (torch.Tensor): ground truth. [B, L, N, 1].
- mus (torch.Tensor): the mean of gaussian distribution. [B, L, N, 1].
- sigmas (torch.Tensor): the std of gaussian distribution. [B, L, N, 1]
- null_val (optional): null value. Defaults to np.nan.
- """
- # mask
- if np.isnan(null_val):
- mask = ~torch.isnan(target)
- else:
- eps = 5e-5
- mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
- mask = mask.float()
- mask /= torch.mean((mask))
- mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
-
- distribution = torch.distributions.Normal(mus, sigmas)
- likelihood = distribution.log_prob(target)
- likelihood = likelihood * mask
- likelihood = likelihood * mask_prior
- assert not torch.isnan(likelihood).any(), "likelihood中存在nan"
- loss_g = -torch.mean(likelihood)
- return loss_g
diff --git a/baselines/DeepAR_M4/runner/__init__.py b/baselines/DeepAR_M4/runner/__init__.py
deleted file mode 100644
index 1e41b855..00000000
--- a/baselines/DeepAR_M4/runner/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .deepar_runner import DeepARRunner
diff --git a/baselines/DeepAR_M4/runner/deepar_runner.py b/baselines/DeepAR_M4/runner/deepar_runner.py
deleted file mode 100644
index ad5425f5..00000000
--- a/baselines/DeepAR_M4/runner/deepar_runner.py
+++ /dev/null
@@ -1,141 +0,0 @@
-from typing import Dict
-import torch
-from basicts.data.registry import SCALER_REGISTRY
-from easytorch.utils.dist import master_only
-
-from basicts.runners.base_m4_runner import BaseM4Runner
-from basicts.metrics import masked_mae
-from basicts.utils import partial
-from ..loss.gaussian import masked_mae_loss
-
-class DeepARRunner(BaseM4Runner):
- def __init__(self, cfg: dict):
- super().__init__(cfg)
- self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
- self.target_features = cfg["MODEL"].get("TARGET_FEATURES", None)
- self.output_seq_len = cfg["DATASET_OUTPUT_LEN"]
- self.metrics = cfg.get("METRICS", {"loss": self.loss, "real_mae": partial(masked_mae_loss, pred_len=self.output_seq_len), "full_mae": masked_mae})
-
- def select_input_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select input features and reshape data to fit the target model.
-
- Args:
- data (torch.Tensor): input history data, shape [B, L, N, C].
-
- Returns:
- torch.Tensor: reshaped data
- """
-
- # select feature using self.forward_features
- if self.forward_features is not None:
- data = data[:, :, :, self.forward_features]
- return data
-
- def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select target features and reshape data back to the BasicTS framework
-
- Args:
- data (torch.Tensor): prediction of the model with arbitrary shape.
-
- Returns:
- torch.Tensor: reshaped data with shape [B, L, N, C]
- """
-
- # select feature using self.target_features
- data = data[:, :, :, self.target_features]
- return data
-
- def rescale_data(self, input_data: Dict) -> Dict:
- """Rescale data.
-
- Args:
- data (Dict): Dict of data to be re-scaled.
-
- Returns:
- Dict: Dict re-scaled data.
- """
-
- if self.if_rescale:
- input_data["inputs"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["inputs"], **self.scaler["args"])
- input_data["prediction"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["prediction"], **self.scaler["args"])
- input_data["target"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["target"], **self.scaler["args"])
- if "mus" in input_data.keys():
- input_data["mus"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["mus"], **self.scaler["args"])
- if "sigmas" in input_data.keys():
- input_data["sigmas"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["sigmas"], **self.scaler["args"])
- return input_data
-
- def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
-
- Args:
- data (tuple): (future_data, history_data, future_mask, history_mask).
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
-
- Returns:
- tuple: (prediction, real_value)
- """
-
- # preprocess
- future_data, history_data, future_mask, history_mask = data
- history_data = self.to_running_device(history_data) # B, L, 1, C
- future_data = self.to_running_device(future_data) # B, L, 1, C
- history_mask = self.to_running_device(history_mask) # B, L, 1
- future_mask = self.to_running_device(future_mask) # B, L, 1
-
- batch_size, length, num_nodes, _ = future_data.shape
-
- history_data = self.select_input_features(history_data)
- future_data_4_dec = self.select_input_features(future_data)
-
- # model forward
- model_return = self.model(history_data=history_data, future_data=future_data_4_dec, history_mask=history_mask, future_mask=future_mask, batch_seen=iter_num, epoch=epoch, train=train)
- if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return * future_mask.unsqueeze(-1)}
- if "inputs" not in model_return: model_return["inputs"] = self.select_target_features(history_data)
- if "target" not in model_return: model_return["target"] = self.select_target_features(future_data * future_mask.unsqueeze(-1))
- return model_return
-
- @torch.no_grad()
- @master_only
- def test(self):
- """Evaluate the model.
-
- Args:
- train_epoch (int, optional): current epoch if in training process.
- """
-
- # TODO: fix OOM: especially when inputs, targets, and predictions are saved at the same time.
- # test loop
- prediction =[]
- target = []
- inputs = []
- mus = []
- sigmas = []
- mask_priors = []
- for _, data in enumerate(self.test_data_loader):
- forward_return = self.forward(data, epoch=None, iter_num=None, train=False)
- if not self.if_evaluate_on_gpu:
- forward_return["prediction"] = forward_return["prediction"].detach().cpu()
- forward_return["target"] = forward_return["target"].detach().cpu()
- forward_return["inputs"] = forward_return["inputs"].detach().cpu()
- forward_return["mus"] = forward_return["mus"].detach().cpu()
- forward_return["sigmas"] = forward_return["sigmas"].detach().cpu()
- forward_return["mask_prior"] = forward_return["mask_prior"].detach().cpu()
- prediction.append(forward_return["prediction"])
- target.append(forward_return["target"])
- inputs.append(forward_return["inputs"])
- mus.append(forward_return["mus"])
- sigmas.append(forward_return["sigmas"])
- mask_priors.append(forward_return["mask_prior"])
- prediction = torch.cat(prediction, dim=0)
- target = torch.cat(target, dim=0)
- inputs = torch.cat(inputs, dim=0)
- mus = torch.cat(mus, dim=0)
- sigmas = torch.cat(sigmas, dim=0)
- mask_priors = torch.cat(mask_priors, dim=0)
- # re-scale data
- returns_all = self.rescale_data({"prediction": prediction[:, -self.output_seq_len:, :, :], "target": target[:, -self.output_seq_len:, :, :], "inputs": inputs, "mus": mus[:, -self.output_seq_len:, :, :], "sigmas": sigmas[:, -self.output_seq_len:, :, :], "mask_prior": mask_priors[:, -self.output_seq_len:, :, :]})
- # evaluate
- self.save_prediction(returns_all)
diff --git a/baselines/FEDformer/ETTh1.py b/baselines/FEDformer/ETTh1.py
index ff69c80f..25abd8f9 100644
--- a/baselines/FEDformer/ETTh1.py
+++ b/baselines/FEDformer/ETTh1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -64,64 +53,99 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0001
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/ETTh2.py b/baselines/FEDformer/ETTh2.py
index d55b4a3e..076b2a97 100644
--- a/baselines/FEDformer/ETTh2.py
+++ b/baselines/FEDformer/ETTh2.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 25, # window size of moving average
@@ -64,13 +53,70 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
@@ -83,53 +129,30 @@
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/ETTm1.py b/baselines/FEDformer/ETTm1.py
index 2ea5c979..efdaf04d 100644
--- a/baselines/FEDformer/ETTm1.py
+++ b/baselines/FEDformer/ETTm1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -59,69 +48,104 @@
"cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
"activation": "gelu",
"num_time_features": 4, # number of used time features
- "time_of_day_size": 96,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0001
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/ETTm2.py b/baselines/FEDformer/ETTm2.py
index ae9ab0e8..afeacf1b 100644
--- a/baselines/FEDformer/ETTm2.py
+++ b/baselines/FEDformer/ETTm2.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -59,69 +48,104 @@
"cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
"activation": "gelu",
"num_time_features": 4, # number of used time features
- "time_of_day_size": 96,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/Electricity.py b/baselines/FEDformer/Electricity.py
index 86abdf00..107a552d 100644
--- a/baselines/FEDformer/Electricity.py
+++ b/baselines/FEDformer/Electricity.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -64,64 +53,99 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0001
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/ExchangeRate.py b/baselines/FEDformer/ExchangeRate.py
index b0c28d0a..335849e0 100644
--- a/baselines/FEDformer/ExchangeRate.py
+++ b/baselines/FEDformer/ExchangeRate.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -59,69 +48,104 @@
"cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
"activation": "gelu",
"num_time_features": 4, # number of used time features
- "time_of_day_size": 96,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0001
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/PEMS04.py b/baselines/FEDformer/PEMS04.py
deleted file mode 100644
index 155d1f73..00000000
--- a/baselines/FEDformer/PEMS04.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import FEDformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "d_model": 512,
- "version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
- "moving_avg": 24, # window size of moving average
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "output_attention": False,
- "embed": "timeF", # [timeF, fixed, learned]
- "mode_select": "random", # for FEDformer, there are two mode selection method, options: [random, low]
- "modes": 64, # modes to be selected random 64
- "base": "legendre", # mwt base
- "L": 3, # ignore level
- "cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
- "activation": "gelu",
- "num_time_features": 2, # number of used time features
- "time_of_day_size": 288,
- "day_of_week_size": 7
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/FEDformer/PEMS04_LTSF.py b/baselines/FEDformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..18de037d
--- /dev/null
+++ b/baselines/FEDformer/PEMS04_LTSF.py
@@ -0,0 +1,156 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import FEDformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
+ "d_model": 512,
+ "version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
+ "moving_avg": 24, # window size of moving average
+ "n_heads": 8,
+ "e_layers": 2, # num of encoder layers
+ "d_layers": 1, # num of decoder layers
+ "d_ff": 2048,
+ "dropout": 0.05,
+ "output_attention": False,
+ "embed": "timeF", # [timeF, fixed, learned]
+ "mode_select": "random", # for FEDformer, there are two mode selection method, options: [random, low]
+ "modes": 64, # modes to be selected random 64
+ "base": "legendre", # mwt base
+ "L": 3, # ignore level
+ "cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
+ "activation": "gelu",
+ "num_time_features": 2, # number of used time features
+ "time_of_day_size": 288,
+ "day_of_week_size": 7
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005,
+ "weight_decay": 0.0005,
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 50],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/PEMS08.py b/baselines/FEDformer/PEMS08.py
deleted file mode 100644
index 3f661d15..00000000
--- a/baselines/FEDformer/PEMS08.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import FEDformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
-NUM_NODES = 170
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "d_model": 512,
- "version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
- "moving_avg": 24, # window size of moving average
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "output_attention": False,
- "embed": "timeF", # [timeF, fixed, learned]
- "mode_select": "random", # for FEDformer, there are two mode selection method, options: [random, low]
- "modes": 64, # modes to be selected random 64
- "base": "legendre", # mwt base
- "L": 3, # ignore level
- "cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
- "activation": "gelu",
- "num_time_features": 2, # number of used time features
- "time_of_day_size": 288,
- "day_of_week_size": 7
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/FEDformer/PEMS08_LTSF.py b/baselines/FEDformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..3f4f47bf
--- /dev/null
+++ b/baselines/FEDformer/PEMS08_LTSF.py
@@ -0,0 +1,156 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import FEDformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
+ "d_model": 512,
+ "version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
+ "moving_avg": 24, # window size of moving average
+ "n_heads": 8,
+ "e_layers": 2, # num of encoder layers
+ "d_layers": 1, # num of decoder layers
+ "d_ff": 2048,
+ "dropout": 0.05,
+ "output_attention": False,
+ "embed": "timeF", # [timeF, fixed, learned]
+ "mode_select": "random", # for FEDformer, there are two mode selection method, options: [random, low]
+ "modes": 64, # modes to be selected random 64
+ "base": "legendre", # mwt base
+ "L": 3, # ignore level
+ "cross_activation": "tanh", # mwt cross atention activation function tanh or softmax
+ "activation": "gelu",
+ "num_time_features": 2, # number of used time features
+ "time_of_day_size": 288,
+ "day_of_week_size": 7
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005,
+ "weight_decay": 0.0005,
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 50],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/Weather.py b/baselines/FEDformer/Weather.py
index d1f8fd9e..06e06d21 100644
--- a/baselines/FEDformer/Weather.py
+++ b/baselines/FEDformer/Weather.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import FEDformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "FEDformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather Data"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "FEDformer"
-CFG.MODEL.ARCH = FEDformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length\
"d_model": 512,
"version": "Fourier", # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
"moving_avg": 24, # window size of moving average
@@ -64,64 +53,99 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0001
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/FEDformer/arch/fedformer_arch.py b/baselines/FEDformer/arch/fedformer_arch.py
index d9210ed7..fc509db1 100644
--- a/baselines/FEDformer/arch/fedformer_arch.py
+++ b/baselines/FEDformer/arch/fedformer_arch.py
@@ -28,7 +28,7 @@ def __init__(self, **model_args):
self.pred_len = int(model_args["pred_len"])
self.output_attention = model_args["output_attention"]
-
+
self.time_of_day_size = model_args.get("time_of_day_size", None)
self.day_of_week_size = model_args.get("day_of_week_size", None)
self.day_of_month_size = model_args.get("day_of_month_size", None)
diff --git a/baselines/FEDformer/arch/fourier_correlation.py b/baselines/FEDformer/arch/fourier_correlation.py
index 5e19d97f..7d570117 100644
--- a/baselines/FEDformer/arch/fourier_correlation.py
+++ b/baselines/FEDformer/arch/fourier_correlation.py
@@ -31,7 +31,7 @@ def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_meth
print('fourier enhanced block used!')
"""
1D Fourier block. It performs representation learning on frequency domain,
- it does FFT, linear transform, and Inverse FFT.
+ it does FFT, linear transform, and Inverse FFT.
"""
# get modes on frequency domain
self.index = get_frequency_modes(
@@ -71,7 +71,7 @@ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, m
super(FourierCrossAttention, self).__init__()
print(' fourier enhanced cross attention used!')
"""
- 1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.
+ 1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.
"""
self.activation = activation
self.in_channels = in_channels
diff --git a/baselines/FEDformer/arch/utils.py b/baselines/FEDformer/arch/utils.py
index abad3835..b72718dc 100644
--- a/baselines/FEDformer/arch/utils.py
+++ b/baselines/FEDformer/arch/utils.py
@@ -23,7 +23,7 @@ def phi_(phi_c, x, lb = 0, ub = 1):
def get_phi_psi(k, base):
-
+
x = Symbol('x')
phi_coeff = np.zeros((k,k))
phi_2x_coeff = np.zeros((k,k))
@@ -33,7 +33,7 @@ def get_phi_psi(k, base):
phi_coeff[ki,:ki+1] = np.flip(np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64))
coeff_ = Poly(legendre(ki, 4*x-1), x).all_coeffs()
phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64))
-
+
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
for ki in range(k):
@@ -73,7 +73,7 @@ def get_phi_psi(k, base):
phi = [np.poly1d(np.flip(phi_coeff[i,:])) for i in range(k)]
psi1 = [np.poly1d(np.flip(psi1_coeff[i,:])) for i in range(k)]
psi2 = [np.poly1d(np.flip(psi2_coeff[i,:])) for i in range(k)]
-
+
elif base == 'chebyshev':
for ki in range(k):
if ki == 0:
@@ -84,9 +84,9 @@ def get_phi_psi(k, base):
phi_coeff[ki,:ki+1] = np.flip(2/np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
coeff_ = Poly(chebyshevt(ki, 4*x-1), x).all_coeffs()
phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
-
+
phi = [partial(phi_, phi_coeff[i,:]) for i in range(k)]
-
+
x = Symbol('x')
kUse = 2*k
roots = Poly(chebyshevt(kUse, 2*x-1)).all_roots()
@@ -94,7 +94,7 @@ def get_phi_psi(k, base):
# x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
# not needed for our purpose here, we use even k always to avoid
wm = np.pi / kUse / 2
-
+
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
@@ -109,7 +109,7 @@ def get_phi_psi(k, base):
psi2_coeff[ki,:] -= proj_ * phi_coeff[i,:]
for j in range(ki):
- proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2*x_m)).sum()
+ proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2*x_m)).sum()
psi1_coeff[ki,:] -= proj_ * psi1_coeff[j,:]
psi2_coeff[ki,:] -= proj_ * psi2_coeff[j,:]
@@ -127,19 +127,19 @@ def get_phi_psi(k, base):
psi1[ki] = partial(phi_, psi1_coeff[ki,:], lb = 0, ub = 0.5+1e-16)
psi2[ki] = partial(phi_, psi2_coeff[ki,:], lb = 0.5+1e-16, ub = 1)
-
+
return phi, psi1, psi2
def get_filter(base, k):
-
+
def psi(psi1, psi2, i, inp):
mask = (inp<=0.5) * 1.0
return psi1[i](inp) * mask + psi2[i](inp) * (1-mask)
-
+
if base not in ['legendre', 'chebyshev']:
raise Exception('Base not supported')
-
+
x = Symbol('x')
H0 = np.zeros((k,k))
H1 = np.zeros((k,k))
@@ -152,17 +152,17 @@ def psi(psi1, psi2, i, inp):
roots = Poly(legendre(k, 2*x-1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
wm = 1/k/legendreDer(k,2*x_m-1)/eval_legendre(k-1,2*x_m-1)
-
+
for ki in range(k):
for kpi in range(k):
H0[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki](x_m/2) * phi[kpi](x_m)).sum()
G0[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m/2) * phi[kpi](x_m)).sum()
H1[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki]((x_m+1)/2) * phi[kpi](x_m)).sum()
G1[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m+1)/2) * phi[kpi](x_m)).sum()
-
+
PHI0 = np.eye(k)
PHI1 = np.eye(k)
-
+
elif base == 'chebyshev':
x = Symbol('x')
kUse = 2*k
@@ -181,7 +181,7 @@ def psi(psi1, psi2, i, inp):
PHI0[ki, kpi] = (wm * phi[ki](2*x_m) * phi[kpi](2*x_m)).sum() * 2
PHI1[ki, kpi] = (wm * phi[ki](2*x_m-1) * phi[kpi](2*x_m-1)).sum() * 2
-
+
PHI0[np.abs(PHI0)<1e-8] = 0
PHI1[np.abs(PHI1)<1e-8] = 0
@@ -189,57 +189,57 @@ def psi(psi1, psi2, i, inp):
H1[np.abs(H1)<1e-8] = 0
G0[np.abs(G0)<1e-8] = 0
G1[np.abs(G1)<1e-8] = 0
-
+
return H0, H1, G0, G1, PHI0, PHI1
def train(model, train_loader, optimizer, epoch, device, verbose = 0,
lossFn = None, lr_schedule=None,
post_proc = lambda args: args):
-
+
if lossFn is None:
lossFn = nn.MSELoss()
model.train()
-
+
total_loss = 0.
for batch_idx, (data, target) in enumerate(train_loader):
-
+
bs = len(data)
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
-
+
output = model(data)
-
+
target = post_proc(target)
output = post_proc(output)
loss = lossFn(output.view(bs, -1), target.view(bs, -1))
-
+
loss.backward()
optimizer.step()
total_loss += loss.sum().item()
if lr_schedule is not None: lr_schedule.step()
-
+
if verbose>0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
-
+
return total_loss/len(train_loader.dataset)
def test(model, test_loader, device, verbose=0, lossFn=None,
post_proc = lambda args: args):
-
+
model.eval()
if lossFn is None:
lossFn = nn.MSELoss()
-
-
+
+
total_loss = 0.
predictions = []
-
+
with torch.no_grad():
for data, target in test_loader:
bs = len(data)
@@ -247,10 +247,10 @@ def test(model, test_loader, device, verbose=0, lossFn=None,
data, target = data.to(device), target.to(device)
output = model(data)
output = post_proc(output)
-
+
loss = lossFn(output.view(bs, -1), target.view(bs, -1))
total_loss += loss.sum().item()
-
+
return total_loss/len(test_loader.dataset)
@@ -346,7 +346,7 @@ def decode(self, x):
x = (x - self.b)/self.a
x = x.view(s)
return x
-
+
class LpLoss(object):
def __init__(self, d=2, p=2, size_average=True, reduction=True):
super(LpLoss, self).__init__()
diff --git a/baselines/FEDformer/run.sh b/baselines/FEDformer/run.sh
deleted file mode 100644
index 3b656702..00000000
--- a/baselines/FEDformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/FEDformer/ETTh1.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTh2.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTm1.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTm2.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/Electricity.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ExchangeRate.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/Weather.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/PEMS08.py --gpus '2'
diff --git a/baselines/GMSDR/METR-LA.py b/baselines/GMSDR/METR-LA.py
deleted file mode 100644
index 468a5203..00000000
--- a/baselines/GMSDR/METR-LA.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 207,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/PEMS-BAY.py b/baselines/GMSDR/PEMS-BAY.py
deleted file mode 100644
index 107750f6..00000000
--- a/baselines/GMSDR/PEMS-BAY.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 325,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/PEMS03.py b/baselines/GMSDR/PEMS03.py
deleted file mode 100644
index 2e21be03..00000000
--- a/baselines/GMSDR/PEMS03.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 358,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/PEMS04.py b/baselines/GMSDR/PEMS04.py
deleted file mode 100644
index 43f2a010..00000000
--- a/baselines/GMSDR/PEMS04.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 307,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/PEMS07.py b/baselines/GMSDR/PEMS07.py
deleted file mode 100644
index 2249225a..00000000
--- a/baselines/GMSDR/PEMS07.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 883,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/PEMS08.py b/baselines/GMSDR/PEMS08.py
deleted file mode 100644
index edd34231..00000000
--- a/baselines/GMSDR/PEMS08.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import GMSDR
-
-CFG = EasyDict()
-
-# DCRNN does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GMSDR model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GMSDR"
-CFG.MODEL.ARCH = GMSDR
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
- "/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
- "horizon": 12,
- "input_dim": 1,
- "max_diffusion_step": 1,
- "num_nodes": 170,
- "num_rnn_layers": 2,
- "output_dim": 1,
- "rnn_units": 64,
- "seq_len": 12,
- "adj_mx": [torch.tensor(i) for i in adj_mx],
- "pre_k": 6,
- "pre_v": 1,
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.SETUP_GRAPH = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0015,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [30, 50, 70, 80],
- "gamma": 0.2
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/GMSDR/arch/__init__.py b/baselines/GMSDR/arch/__init__.py
deleted file mode 100644
index a56c811c..00000000
--- a/baselines/GMSDR/arch/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .gmsdr_arch import GMSDR
-
-__all__ = ['GMSDR']
\ No newline at end of file
diff --git a/baselines/GMSDR/arch/gmsdr_arch.py b/baselines/GMSDR/arch/gmsdr_arch.py
deleted file mode 100644
index 65936375..00000000
--- a/baselines/GMSDR/arch/gmsdr_arch.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-
-from .gmsdr_cell import GMSDRCell
-
-def count_parameters(model):
- return sum(p.numel() for p in model.parameters() if p.requires_grad)
-
-
-class Seq2SeqAttrs:
- def __init__(self, adj_mx, **model_kwargs):
- self.adj_mx = adj_mx
- self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
- self.num_nodes = int(model_kwargs.get('num_nodes', 1))
- self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
- self.rnn_units = int(model_kwargs.get('rnn_units'))
- self.hidden_state_size = self.num_nodes * self.rnn_units
- self.pre_k = int(model_kwargs.get('pre_k', 1))
- self.pre_v = int(model_kwargs.get('pre_v', 1))
- self.input_dim = int(model_kwargs.get('input_dim', 1))
- self.output_dim = int(model_kwargs.get('output_dim', 1))
-
-
-class EncoderModel(nn.Module, Seq2SeqAttrs):
- def __init__(self, adj_mx, **model_kwargs):
- nn.Module.__init__(self)
- Seq2SeqAttrs.__init__(self, adj_mx, **model_kwargs)
- self.input_dim = int(model_kwargs.get('input_dim', 1))
- self.seq_len = int(model_kwargs.get('seq_len')) # for the encoder
- self.mlp = nn.Linear(self.input_dim, self.rnn_units)
- self.gmsdr_layers = nn.ModuleList(
- [GMSDRCell(self.rnn_units, self.input_dim, adj_mx, self.max_diffusion_step, self.num_nodes, self.pre_k, self.pre_v) for _ in range(self.num_rnn_layers)])
-
- def forward(self, inputs, hx_k):
- """
- Encoder forward pass.
-
- :param inputs: shape (batch_size, self.num_nodes * self.input_dim)
- :param hx_k: (num_layers, batch_size, pre_k, self.num_nodes, self.rnn_units)
- optional, zeros if not provided
- :return: output: # shape (batch_size, self.hidden_state_size)
- hx_k # shape (num_layers, batch_size, pre_k, self.num_nodes, self.rnn_units)
- (lower indices mean lower layers)
- """
- hx_ks = []
- batch = inputs.shape[0]
- x = inputs.reshape(batch, self.num_nodes, self.input_dim)
- output = self.mlp(x).view(batch, -1)
- for layer_num, dcgru_layer in enumerate(self.gmsdr_layers):
- next_hidden_state, new_hx_k = dcgru_layer(output, hx_k[layer_num])
- hx_ks.append(new_hx_k)
- output = next_hidden_state
- return output, torch.stack(hx_ks)
-
-
-class DecoderModel(nn.Module, Seq2SeqAttrs):
- def __init__(self, adj_mx, **model_kwargs):
- nn.Module.__init__(self)
- Seq2SeqAttrs.__init__(self, adj_mx, **model_kwargs)
- self.output_dim = int(model_kwargs.get('output_dim', 1))
- self.horizon = int(model_kwargs.get('horizon', 12)) # for the decoder
- self.projection_layer = nn.Linear(self.rnn_units, self.output_dim)
- self.gmsdr_layers = nn.ModuleList(
- [GMSDRCell(self.rnn_units, self.rnn_units, adj_mx, self.max_diffusion_step, self.num_nodes, self.pre_k, self.pre_v
- ) for _ in range(self.num_rnn_layers)])
-
- def forward(self, inputs, hx_k):
- """
- Decoder forward pass.
-
- :param inputs: shape (batch_size, self.num_nodes * self.output_dim)
- :param hx_k: (num_layers, batch_size, pre_k, num_nodes, rnn_units)
- optional, zeros if not provided
- :return: output: # shape (batch_size, self.num_nodes * self.output_dim)
- hidden_state # shape (num_layers, batch_size, self.hidden_state_size)
- (lower indices mean lower layers)
- """
- hx_ks = []
- output = inputs
- for layer_num, dcgru_layer in enumerate(self.gmsdr_layers):
- next_hidden_state, new_hx_k = dcgru_layer(output, hx_k[layer_num])
- hx_ks.append(new_hx_k)
- output = next_hidden_state
-
- projected = self.projection_layer(output.view(-1, self.rnn_units))
- output = projected.view(-1, self.num_nodes * self.output_dim)
-
- return output, torch.stack(hx_ks)
-
-
-class GMSDR(nn.Module, Seq2SeqAttrs):
- """
- Paper: MSDR: Multi-Step Dependency Relation Networks for Spatial Temporal Forecasting
- Link: https://dl.acm.org/doi/abs/10.1145/3534678.3539397
- Reference Code: https://github.com/dcliu99/MSDR
- """
- def __init__(self, adj_mx, **model_kwargs):
- super().__init__()
- Seq2SeqAttrs.__init__(self, adj_mx, **model_kwargs)
- self.encoder_model = EncoderModel(adj_mx, **model_kwargs)
- self.decoder_model = DecoderModel(adj_mx, **model_kwargs)
- self.out = nn.Linear(self.rnn_units, self.decoder_model.output_dim)
-
- def encoder(self, inputs):
- """
- encoder forward pass on t time steps
- :param inputs: shape (seq_len, batch_size, num_sensor * input_dim)
- :return: hx_k: (num_layers, batch_size, pre_k, num_sensor, rnn_units)
- """
- hx_k = torch.zeros(self.num_rnn_layers, inputs.shape[1], self.pre_k, self.num_nodes, self.rnn_units,
- device=inputs.device)
- outputs = []
- for t in range(self.encoder_model.seq_len):
- output, hx_k = self.encoder_model(inputs[t], hx_k)
- outputs.append(output)
- return torch.stack(outputs), hx_k
-
- def decoder(self, inputs, hx_k, labels=None, batches_seen=None):
- """
- Decoder forward pass
- :param inputs: (seq_len, batch_size, num_sensor * rnn_units)
- :param hx_k: (num_layers, batch_size, pre_k, num_sensor, rnn_units)
- :param labels: (self.horizon, batch_size, self.num_nodes * self.output_dim) [optional, not exist for inference]
- :param batches_seen: global step [optional, not exist for inference]
- :return: output: (self.horizon, batch_size, self.num_nodes * self.output_dim)
- """
- decoder_hx_k = hx_k
- decoder_input = inputs
-
- outputs = []
- for t in range(self.decoder_model.horizon):
- decoder_output, decoder_hx_k = self.decoder_model(decoder_input[t],
- decoder_hx_k)
- outputs.append(decoder_output)
- outputs = torch.stack(outputs)
- return outputs
-
- def Loss_l2(self):
- base_params = dict(self.named_parameters())
- loss_l2 = 0
- count = 0
- for key,value in base_params.items():
- if 'bias' not in key:
- loss_l2 += torch.sum(value**2)
- count += value.nelement()
- return loss_l2
-
- def forward(self, history_data, future_data=None, batch_seen=None, **kwargs):
- """
- seq2seq forward pass
- :param inputs: shape (seq_len, batch_size, num_sensor * input_dim)
- :param labels: shape (horizon, batch_size, num_sensor * output)
- :param batches_seen: batches seen till now
- :return: output: (self.horizon, batch_size, self.num_nodes * self.output_dim)
- """
- inputs = history_data.transpose(0,1).reshape(history_data.shape[1],history_data.shape[0],-1)
- encoder_outputs, hx_k = self.encoder(inputs)
- outputs = self.decoder(encoder_outputs, hx_k, future_data, batches_seen=batch_seen)
- if batch_seen == 0:
- print(
- "Total trainable parameters {}".format(count_parameters(self))
- )
- return outputs.transpose(0,1).reshape(history_data.shape[0],history_data.shape[1],history_data.shape[2],-1)
\ No newline at end of file
diff --git a/baselines/GMSDR/arch/gmsdr_cell.py b/baselines/GMSDR/arch/gmsdr_cell.py
deleted file mode 100644
index 0926a495..00000000
--- a/baselines/GMSDR/arch/gmsdr_cell.py
+++ /dev/null
@@ -1,184 +0,0 @@
-import numpy as np
-import torch
-from torch import nn, Tensor
-import torch.nn.functional as F
-
-
-class Seq2SeqAttrs:
- def __init__(self, adj_mx, **model_kwargs):
- self.adj_mx = adj_mx
- self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
- self.num_nodes = int(model_kwargs.get('num_nodes', 1))
- self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
- self.rnn_units = int(model_kwargs.get('rnn_units'))
- self.hidden_state_size = self.num_nodes * self.rnn_units
- self.pre_k = int(model_kwargs.get('pre_k', 1))
- self.pre_v = int(model_kwargs.get('pre_v', 1))
- self.input_dim = int(model_kwargs.get('input_dim', 2))
- self.output_dim = int(model_kwargs.get('output_dim', 1))
-
-
-class LayerParams:
- def __init__(self, rnn_network: torch.nn.Module, layer_type: str):
- self._rnn_network = rnn_network
- self._params_dict = {}
- self._biases_dict = {}
- self._type = layer_type
-
- def get_weights(self, shape):
- if shape not in self._params_dict:
- nn_param = torch.nn.Parameter(torch.empty(*shape))
- torch.nn.init.xavier_normal_(nn_param)
- self._params_dict[shape] = nn_param
- self._rnn_network.register_parameter('{}_weight_{}'.format(self._type, str(shape)),
- nn_param)
- return self._params_dict[shape]
-
- def get_biases(self, length, bias_start=0.0):
- if length not in self._biases_dict:
- biases = torch.nn.Parameter(torch.empty(length))
- torch.nn.init.constant_(biases, bias_start)
- self._biases_dict[length] = biases
- self._rnn_network.register_parameter('{}_biases_{}'.format(self._type, str(length)),
- biases)
-
- return self._biases_dict[length]
-
-
-class GMSDRCell(torch.nn.Module):
- def __init__(self, num_units, input_dim, adj_mx, max_diffusion_step, num_nodes, pre_k, pre_v, nonlinearity='tanh',
- use_gc_for_ru=True):
- """
-
- :param num_units:
- :param adj_mx:
- :param max_diffusion_step:
- :param num_nodes:
- :param nonlinearity:
- :param filter_type: "laplacian", "random_walk", "dual_random_walk".
- :param use_gc_for_ru: whether to use Graph convolution to calculate the reset and update gates.
- """
-
- super().__init__()
- self._activation = torch.tanh if nonlinearity == 'tanh' else torch.relu
- # support other nonlinearities up here?
- self._num_nodes = num_nodes
- self._num_units = num_units
- self._max_diffusion_step = max_diffusion_step
- self._supports = []
- self._use_gc_for_ru = use_gc_for_ru
- self.pre_k = pre_k
- self.pre_v = pre_v
- self.input_dim = input_dim
- self.nodevec1 = nn.Parameter(torch.randn(num_nodes, 10), requires_grad=True)
- self.nodevec2 = nn.Parameter(torch.randn(10, num_nodes), requires_grad=True)
-
- # supports = []
- # if filter_type == "laplacian":
- # supports.append(utils.calculate_scaled_laplacian(adj_mx, lambda_max=None))
- # elif filter_type == "random_walk":
- # supports.append(utils.calculate_random_walk_matrix(adj_mx).T)
- # elif filter_type == "dual_random_walk":
- # supports.append(utils.calculate_random_walk_matrix(adj_mx).T)
- # supports.append(utils.calculate_random_walk_matrix(adj_mx.T).T)
- # else:
- # supports.append(utils.calculate_scaled_laplacian(adj_mx))
- # for support in supports:
- # self._supports.append(support)
- # # self._supports.append(self._build_sparse_matrix(support))
- self._supports = adj_mx
-
- self._fc_params = LayerParams(self, 'fc')
- self._gconv_params = LayerParams(self, 'gconv')
- self.W = nn.Parameter(torch.zeros(self._num_units, self._num_units), requires_grad=True)
- self.b = nn.Parameter(torch.zeros(num_nodes, self._num_units), requires_grad=True)
- self.R = nn.Parameter(torch.zeros(pre_k, num_nodes, self._num_units), requires_grad=True)
- self.attlinear = nn.Linear(num_nodes * self._num_units, 1)
-
- # @staticmethod
- # def _build_sparse_matrix(L):
- # L = L.tocoo()
- # indices = np.column_stack((L.row, L.col))
- # # this is to ensure row-major ordering to equal torch.sparse.sparse_reorder(L)
- # indices = indices[np.lexsort((indices[:, 0], indices[:, 1]))]
- # L = torch.sparse_coo_tensor(indices.T, L.data, L.shape, device=device)
- # return L
-
- def forward(self, inputs, hx_k):
- """Gated recurrent unit (GRU) with Graph Convolution.
- :param inputs: (B, num_nodes * input_dim)
- :param hx_k: (B, pre_k, num_nodes, rnn_units)
-
- :return
- - Output: A `2-D` tensor with shape `(B, num_nodes * rnn_units)`.
- """
- bs, k, n, d = hx_k.shape
- preH = hx_k[:, -1:]
- for i in range(1, self.pre_v):
- preH = torch.cat([preH, hx_k[:, -(i + 1):-i]], -1)
- preH = preH.reshape(bs, n, d * self.pre_v)
- self.adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
- convInput = F.leaky_relu_(self._gconv(inputs, preH, d, bias_start=1.0))
- new_states = hx_k + self.R.unsqueeze(0)
- output = torch.matmul(convInput, self.W) + self.b.unsqueeze(0) + self.attention(new_states)
- output = output.unsqueeze(1)
- x = hx_k[:, 1:k]
- hx_k = torch.cat([x, output], dim=1)
- output = output.reshape(bs, n * d)
- return output, hx_k
-
- @staticmethod
- def _concat(x, x_):
- x_ = x_.unsqueeze(0)
- return torch.cat([x, x_], dim=0)
-
- def _gconv(self, inputs, state, output_size, bias_start=0.0):
- # input / state: (batch_size, num_nodes, input_dim/state_dim)
- batch_size = inputs.shape[0]
- inputs = torch.reshape(inputs, (batch_size, self._num_nodes, -1))
- state = torch.reshape(state, (batch_size, self._num_nodes, -1))
- inputs_and_state = torch.cat([inputs, state], dim=2)
- input_size = inputs_and_state.size(2)
-
- x = inputs_and_state
- x0 = x.permute(1, 2, 0) # (num_nodes, total_arg_size, batch_size)
- x0 = torch.reshape(x0, shape=[self._num_nodes, input_size * batch_size])
- x = torch.unsqueeze(x0, 0)
- if self._max_diffusion_step == 0:
- pass
- else:
- for support in self._supports:
- x1 = torch.mm(support.to(x0.device), x0)
- x = self._concat(x, x1)
-
- for k in range(2, self._max_diffusion_step + 1):
- x2 = 2 * torch.mm(support.to(x0.device), x1) - x0
- x = self._concat(x, x2)
- x1, x0 = x2, x1
- x1 = self.adp.mm(x0)
- x = self._concat(x, x1)
- for k in range(2, self._max_diffusion_step + 1):
- x2 = self.adp.mm(x1) - x0
- x = self._concat(x, x2)
- x1, x0 = x2, x1
- num_matrices = (len(self._supports) + 1) * self._max_diffusion_step + 1
- # num_matrices = (len(self._supports)) * self._max_diffusion_step + 1
- x = torch.reshape(x, shape=[num_matrices, self._num_nodes, input_size, batch_size])
- x = x.permute(3, 1, 2, 0) # (batch_size, num_nodes, input_size, order)
- x = torch.reshape(x, shape=[batch_size * self._num_nodes, input_size * num_matrices])
-
- weights = self._gconv_params.get_weights((input_size * num_matrices, output_size)).to(x.device)
- x = torch.matmul(x, weights) # (batch_size * self._num_nodes, output_size)
-
- biases = self._gconv_params.get_biases(output_size, bias_start).to(x.device)
- x += biases
- # Reshape res back to 2D: (batch_size, num_node, state_dim) -> (batch_size, num_node * state_dim)
- return torch.reshape(x, [batch_size, self._num_nodes, output_size])
-
- def attention(self, inputs: Tensor):
- bs, k, n, d = inputs.size()
- x = inputs.reshape(bs, k, -1)
- out = self.attlinear(x)
- weight = F.softmax(out, dim=1)
- outputs = (x * weight).sum(dim=1).reshape(bs, n, d)
- return outputs
\ No newline at end of file
diff --git a/baselines/GMSDR/run.sh b/baselines/GMSDR/run.sh
deleted file mode 100644
index 2f46b383..00000000
--- a/baselines/GMSDR/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/GMSDR/METR-LA.py --gpus '4'
-python experiments/train.py -c baselines/GMSDR/PEMS-BAY.py --gpus '4'
-python experiments/train.py -c baselines/GMSDR/PEMS03.py --gpus '4'
-python experiments/train.py -c baselines/GMSDR/PEMS04.py --gpus '4'
-python experiments/train.py -c baselines/GMSDR/PEMS07.py --gpus '4'
-python experiments/train.py -c baselines/GMSDR/PEMS08.py --gpus '4'
\ No newline at end of file
diff --git a/baselines/GTS/METR-LA.py b/baselines/GTS/METR-LA.py
index 5a704ecb..2ef88276 100644
--- a/baselines/GTS/METR-LA.py
+++ b/baselines/GTS/METR-LA.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_adj, load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 64,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 383664,
+ "dim_fc": 383552,
"node_feats": node_feats,
"temp": 0.5,
"k": 10
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.005,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 40],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/PEMS-BAY.py b/baselines/GTS/PEMS-BAY.py
index eb3d424c..32a1caf4 100644
--- a/baselines/GTS/PEMS-BAY.py
+++ b/baselines/GTS/PEMS-BAY.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 128,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 583520,
+ "dim_fc": 583408,
"node_feats": node_feats,
"temp": 0.5,
"k": 30
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/PEMS03.py b/baselines/GTS/PEMS03.py
index bdb1aef6..3ed23a9a 100644
--- a/baselines/GTS/PEMS03.py
+++ b/baselines/GTS/PEMS03.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 64,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 251456,
+ "dim_fc": 251296,
"node_feats": node_feats,
"temp": 0.5,
"k": 30
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/PEMS04.py b/baselines/GTS/PEMS04.py
index cbe27243..76193215 100644
--- a/baselines/GTS/PEMS04.py
+++ b/baselines/GTS/PEMS04.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 64,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 162976,
+ "dim_fc": 162832,
"node_feats": node_feats,
"temp": 0.5,
"k": 30
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/PEMS07.py b/baselines/GTS/PEMS07.py
index 1f5d7ad7..3b4cd985 100644
--- a/baselines/GTS/PEMS07.py
+++ b/baselines/GTS/PEMS07.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
-
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 64,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 270816,
+ "dim_fc": 270656,
"node_feats": node_feats,
"temp": 0.5,
"k": 30
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
+
+############################## Metrics Configuration ##############################
-# ================= optim ================= #
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30],
"gamma": 0.1
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/PEMS08.py b/baselines/GTS/PEMS08.py
index 0e8ebf29..dc0f09ee 100644
--- a/baselines/GTS/PEMS08.py
+++ b/baselines/GTS/PEMS08.py
@@ -1,50 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+import random
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils.serialization import load_pkl
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_dataset_desc, \
+ load_dataset_data
from .arch import GTS
-from .loss import gts_loss
-
-CFG = EasyDict()
-# GTS does not allow to load parameters since it creates parameters in the first iteration
-resume = False
-if not resume:
- import random
- _ = random.randint(-1e6, 1e6)
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GTS
+node_feats = load_dataset_data(DATA_NAME)
+train_len = int(node_feats.shape[0] * TRAIN_VAL_TEST_RATIO[0])
+node_feats = node_feats[:train_len, ..., 0]
-# ================= general ================= #
-CFG.DESCRIPTION = "GTS model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG._ = _
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GTS"
-CFG.MODEL.ARCH = GTS
-node_feats_full = load_pkl("datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["processed_data"][..., 0]
-train_index_list = load_pkl("datasets/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN, CFG.get("RESCALE", True)))["train"]
-node_feats = node_feats_full[:train_index_list[-1][-1], ...]
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"cl_decay_steps": 2000,
"filter_type": "dual_random_walk",
"horizon": 12,
@@ -57,79 +43,117 @@
"rnn_units": 64,
"seq_len": 12,
"use_curriculum_learning": True,
- "dim_fc": 171280,
+ "dim_fc": 171120,
"node_feats": node_feats,
"temp": 0.5,
"k": 30
}
-CFG.MODEL.SETUP_GRAPH = True
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+# DCRNN does not allow to load parameters since it creates parameters in the first iteration
+CFG._ = random.randint(-1e6, 1e6)
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
+CFG.MODEL.SETUP_GRAPH = True
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = gts_loss
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"eps": 1e-3
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 30],
"gamma": 0.1
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GTS/arch/gts_arch.py b/baselines/GTS/arch/gts_arch.py
index 6d0a3f96..8be92acb 100644
--- a/baselines/GTS/arch/gts_arch.py
+++ b/baselines/GTS/arch/gts_arch.py
@@ -243,7 +243,7 @@ def forward(self, history_data, future_data=None, batch_seen=None, epoch=None, *
:param batch_seen: batches seen till now
:return: output: (self.horizon, batch_size, self.num_nodes * self.output_dim)
"""
-
+
# reshape data
batch_size, length, num_nodes, channels = history_data.shape
history_data = history_data.reshape(batch_size, length, num_nodes * channels) # [B, L, N*C]
@@ -253,7 +253,7 @@ def forward(self, history_data, future_data=None, batch_seen=None, epoch=None, *
batch_size, length, num_nodes, channels = future_data.shape
future_data = future_data.reshape(batch_size, length, num_nodes * channels) # [B, L, N*C]
future_data = future_data.transpose(0, 1) # [L, B, N*C]
-
+
# GTS
inputs = history_data
labels = future_data
diff --git a/baselines/GTS/run.sh b/baselines/GTS/run.sh
deleted file mode 100644
index a989d902..00000000
--- a/baselines/GTS/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/GTS/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS08.py --gpus '1'
diff --git a/baselines/GWNet/METR-LA.py b/baselines/GWNet/METR-LA.py
index 1d5939e7..09c70607 100644
--- a/baselines/GWNet/METR-LA.py
+++ b/baselines/GWNet/METR-LA.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 207,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/PEMS-BAY.py b/baselines/GWNet/PEMS-BAY.py
index 3bbb0ba1..c2f71fae 100644
--- a/baselines/GWNet/PEMS-BAY.py
+++ b/baselines/GWNet/PEMS-BAY.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 325,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/PEMS03.py b/baselines/GWNet/PEMS03.py
index d43c10e2..fefbd0ee 100644
--- a/baselines/GWNet/PEMS03.py
+++ b/baselines/GWNet/PEMS03.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 358,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/PEMS04.py b/baselines/GWNet/PEMS04.py
index 309642cc..8525e87d 100644
--- a/baselines/GWNet/PEMS04.py
+++ b/baselines/GWNet/PEMS04.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 307,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/PEMS07.py b/baselines/GWNet/PEMS07.py
index acd8312a..9aa808e9 100644
--- a/baselines/GWNet/PEMS07.py
+++ b/baselines/GWNet/PEMS07.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 883,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# validating data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/PEMS08.py b/baselines/GWNet/PEMS08.py
index 05a3ceb1..d9dbbcd3 100644
--- a/baselines/GWNet/PEMS08.py
+++ b/baselines/GWNet/PEMS08.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import GraphWaveNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "GWNet"
-CFG.MODEL.ARCH = GraphWaveNet
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = GraphWaveNet
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "doubletransition")
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes": 170,
"supports": [torch.tensor(i) for i in adj_mx],
"dropout": 0.3,
@@ -54,73 +43,109 @@
"blocks": 4,
"layers": 2
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/GWNet/run.sh b/baselines/GWNet/run.sh
deleted file mode 100644
index e2e08381..00000000
--- a/baselines/GWNet/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/GWNet/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS08.py --gpus '1'
diff --git a/baselines/HI/HI_METR-LA_in96_out96.py b/baselines/HI/HI_METR-LA_in96_out96.py
deleted file mode 100644
index 3aa48ffb..00000000
--- a/baselines/HI/HI_METR-LA_in96_out96.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import NoBPRunner
-
-from .arch import HINetwork
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "HI model configuration"
-CFG.RUNNER = NoBPRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "HINetwork"
-CFG.MODEL.ARCH = HINetwork
-CFG.MODEL.PARAM = {
- "input_length": CFG.DATASET_INPUT_LEN,
- "output_length": CFG.DATASET_OUTPUT_LEN
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.005,
- "weight_decay": 1.0e-5,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [50],
- "gamma": 0.1
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 1
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.HORIZONS = [12, 24, 48, 96]
-CFG.TEST.INTERVAL = 1
-# validating data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
diff --git a/baselines/HI/METR-LA.py b/baselines/HI/METR-LA.py
new file mode 100644
index 00000000..b2e4daa3
--- /dev/null
+++ b/baselines/HI/METR-LA.py
@@ -0,0 +1,138 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import NoBPRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
+
+from .arch import HINetwork
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = HINetwork
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
+ "/adj_mx.pkl", "doubletransition")
+MODEL_PARAM = {
+ "input_length": INPUT_LEN,
+ "output_length": OUTPUT_LEN
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = NoBPRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 50],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/HI/arch/hi_arch.py b/baselines/HI/arch/hi_arch.py
index cfc240e1..21459a58 100644
--- a/baselines/HI/arch/hi_arch.py
+++ b/baselines/HI/arch/hi_arch.py
@@ -10,7 +10,7 @@ class HINetwork(nn.Module):
Paper: Historical Inertia: A Neglected but Powerful Baseline for Long Sequence Time-series Forecasting
Link: https://arxiv.org/abs/2103.16349
"""
-
+
def __init__(self, input_length: int, output_length: int, channel=None, reverse=False):
"""
Init HI.
diff --git a/baselines/Informer/ETTh1.py b/baselines/Informer/ETTh1.py
index 2b94a53a..858b4217 100644
--- a/baselines/Informer/ETTh1.py
+++ b/baselines/Informer/ETTh1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -62,74 +51,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/ETTh2.py b/baselines/Informer/ETTh2.py
index f8fba75f..2b5bf185 100644
--- a/baselines/Informer/ETTh2.py
+++ b/baselines/Informer/ETTh2.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 5, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -62,74 +51,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
+ "milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/ETTm1.py b/baselines/Informer/ETTm1.py
index b8cf20a3..79bdc4d8 100644
--- a/baselines/Informer/ETTm1.py
+++ b/baselines/Informer/ETTm1.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -57,79 +46,112 @@
"distil": True, # whether to use distilling in encoder, using this argument means not using distilling
"mix": True, # use mix attention in generative decoder
"num_time_features": 4, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 24,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/ETTm2.py b/baselines/Informer/ETTm2.py
index 7b8e7dc5..985e4b7d 100644
--- a/baselines/Informer/ETTm2.py
+++ b/baselines/Informer/ETTm2.py
@@ -1,47 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -57,79 +47,112 @@
"distil": True, # whether to use distilling in encoder, using this argument means not using distilling
"mix": True, # use mix attention in generative decoder
"num_time_features": 4, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 24,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/Electricity.py b/baselines/Informer/Electricity.py
index 7053127a..0caf522c 100644
--- a/baselines/Informer/Electricity.py
+++ b/baselines/Informer/Electricity.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -61,64 +50,97 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
+ "weight_decay": 0.0005,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/ExchangeRate.py b/baselines/Informer/ExchangeRate.py
index de6bae8f..e6644ff0 100644
--- a/baselines/Informer/ExchangeRate.py
+++ b/baselines/Informer/ExchangeRate.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -62,74 +51,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/METR-LA.py b/baselines/Informer/METR-LA.py
deleted file mode 100644
index 11e00f2b..00000000
--- a/baselines/Informer/METR-LA.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Informer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 96 # not tested yet
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
-NUM_NODES = 207
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "factor": 3, # probsparse attn factor
- "d_model": 512,
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- # "e_layers": [4, 2, 1], # for InformerStack
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "attn": 'prob', # attention used in encoder, options:[prob, full]
- "embed": "timeF", # [timeF, fixed, learned]
- "activation": "gelu",
- "output_attention": False,
- "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
- "mix": True, # use mix attention in generative decoder
- "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- "day_of_month_size": 31,
- "day_of_year_size": 366
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Informer/PEMS-BAY.py b/baselines/Informer/PEMS-BAY.py
deleted file mode 100644
index a45af96e..00000000
--- a/baselines/Informer/PEMS-BAY.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Informer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 96 # not tested yet
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
-NUM_NODES = 325
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "factor": 3, # probsparse attn factor
- "d_model": 512,
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- # "e_layers": [4, 2, 1], # for InformerStack
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "attn": 'prob', # attention used in encoder, options:[prob, full]
- "embed": "timeF", # [timeF, fixed, learned]
- "activation": "gelu",
- "output_attention": False,
- "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
- "mix": True, # use mix attention in generative decoder
- "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- "day_of_month_size": 31,
- "day_of_year_size": 366
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Informer/PEMS04.py b/baselines/Informer/PEMS04.py
deleted file mode 100644
index 9905a4cb..00000000
--- a/baselines/Informer/PEMS04.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Informer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "factor": 3, # probsparse attn factor
- "d_model": 512,
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- # "e_layers": [4, 2, 1], # for InformerStack
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "attn": 'prob', # attention used in encoder, options:[prob, full]
- "embed": "timeF", # [timeF, fixed, learned]
- "activation": "gelu",
- "output_attention": False,
- "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
- "mix": True, # use mix attention in generative decoder
- "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- "day_of_month_size": 31,
- "day_of_year_size": 366
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Informer/PEMS04_LTSF.py b/baselines/Informer/PEMS04_LTSF.py
new file mode 100644
index 00000000..d54aa976
--- /dev/null
+++ b/baselines/Informer/PEMS04_LTSF.py
@@ -0,0 +1,160 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Informer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
+ "factor": 3, # probsparse attn factor
+ "d_model": 512,
+ "n_heads": 8,
+ "e_layers": 2, # num of encoder layers
+ # "e_layers": [4, 2, 1], # for InformerStack
+ "d_layers": 1, # num of decoder layers
+ "d_ff": 2048,
+ "dropout": 0.05,
+ "attn": 'prob', # attention used in encoder, options:[prob, full]
+ "embed": "timeF", # [timeF, fixed, learned]
+ "activation": "gelu",
+ "output_attention": False,
+ "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
+ "mix": True, # use mix attention in generative decoder
+ "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ "day_of_month_size": 31,
+ "day_of_year_size": 366
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/PEMS08.py b/baselines/Informer/PEMS08.py
deleted file mode 100644
index ee8d7ce3..00000000
--- a/baselines/Informer/PEMS08.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Informer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 96 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
-NUM_NODES = 170
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
- "factor": 3, # probsparse attn factor
- "d_model": 512,
- "n_heads": 8,
- "e_layers": 2, # num of encoder layers
- # "e_layers": [4, 2, 1], # for InformerStack
- "d_layers": 1, # num of decoder layers
- "d_ff": 2048,
- "dropout": 0.05,
- "attn": 'prob', # attention used in encoder, options:[prob, full]
- "embed": "timeF", # [timeF, fixed, learned]
- "activation": "gelu",
- "output_attention": False,
- "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
- "mix": True, # use mix attention in generative decoder
- "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- "day_of_month_size": 31,
- "day_of_year_size": 366
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Informer/PEMS08_LTSF.py b/baselines/Informer/PEMS08_LTSF.py
new file mode 100644
index 00000000..cce2a7e5
--- /dev/null
+++ b/baselines/Informer/PEMS08_LTSF.py
@@ -0,0 +1,160 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Informer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
+ "factor": 3, # probsparse attn factor
+ "d_model": 512,
+ "n_heads": 8,
+ "e_layers": 2, # num of encoder layers
+ # "e_layers": [4, 2, 1], # for InformerStack
+ "d_layers": 1, # num of decoder layers
+ "d_ff": 2048,
+ "dropout": 0.05,
+ "attn": 'prob', # attention used in encoder, options:[prob, full]
+ "embed": "timeF", # [timeF, fixed, learned]
+ "activation": "gelu",
+ "output_attention": False,
+ "distil": True, # whether to use distilling in encoder, using this argument means not using distilling
+ "mix": True, # use mix attention in generative decoder
+ "num_time_features": 2, # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ "day_of_month_size": 31,
+ "day_of_year_size": 366
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/Weather.py b/baselines/Informer/Weather.py
index fb26d8bb..ead04e90 100644
--- a/baselines/Informer/Weather.py
+++ b/baselines/Informer/Weather.py
@@ -1,47 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Informer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Informer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 192 # the best in {96, 192, 336, 720}
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Informer"
-CFG.MODEL.ARCH = Informer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "label_len": CFG.DATASET_INPUT_LEN/2, # start token length used in decoder
- "out_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length\
+ "seq_len": INPUT_LEN, # input sequence length
+ "label_len": INPUT_LEN/2, # start token length used in decoder
+ "out_len": OUTPUT_LEN, # prediction sequence length\
"factor": 3, # probsparse attn factor
"d_model": 512,
"n_heads": 8,
@@ -62,74 +51,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
+ "milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Informer/arch/informer_arch.py b/baselines/Informer/arch/informer_arch.py
index 60cbb53b..d0368c64 100644
--- a/baselines/Informer/arch/informer_arch.py
+++ b/baselines/Informer/arch/informer_arch.py
@@ -77,7 +77,7 @@ def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len,
# self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
# self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
self.projection = nn.Linear(d_model, c_out, bias=True)
-
+
def forward_xformer(self, x_enc: torch.Tensor, x_mark_enc: torch.Tensor, x_dec: torch.Tensor, x_mark_dec: torch.Tensor,
enc_self_mask: torch.Tensor=None, dec_self_mask: torch.Tensor=None, dec_enc_mask: torch.Tensor=None) -> torch.Tensor:
"""Feed forward of Informer. Kindly note that `enc_self_mask`, `dec_self_mask`, and `dec_enc_mask` are not actually used in Informer.
@@ -101,9 +101,9 @@ def forward_xformer(self, x_enc: torch.Tensor, x_mark_enc: torch.Tensor, x_dec:
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
dec_out = self.projection(dec_out)
-
+
return dec_out[:, -self.pred_len:, :].unsqueeze(-1) # [B, L, N, C]
-
+
def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
"""
@@ -179,7 +179,7 @@ def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len,
# self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
# self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
self.projection = nn.Linear(d_model, c_out, bias=True)
-
+
def forward_xformer(self, x_enc: torch.Tensor, x_mark_enc: torch.Tensor, x_dec: torch.Tensor, x_mark_dec: torch.Tensor,
enc_self_mask: torch.Tensor=None, dec_self_mask: torch.Tensor=None, dec_enc_mask: torch.Tensor=None) -> torch.Tensor:
"""Feed forward of Informer. Kindly note that `enc_self_mask`, `dec_self_mask`, and `dec_enc_mask` are not actually used in Informer.
diff --git a/baselines/Informer/arch/masking.py b/baselines/Informer/arch/masking.py
index 7fd479e0..8ac5942f 100644
--- a/baselines/Informer/arch/masking.py
+++ b/baselines/Informer/arch/masking.py
@@ -18,7 +18,7 @@ def __init__(self, B, H, L, index, scores, device="cpu"):
torch.arange(H)[None, :, None],
index, :].to(device)
self._mask = indicator.view(scores.shape).to(device)
-
+
@property
def mask(self):
return self._mask
\ No newline at end of file
diff --git a/baselines/Informer/run.sh b/baselines/Informer/run.sh
deleted file mode 100644
index db4bd1fa..00000000
--- a/baselines/Informer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Informer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Informer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Informer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Informer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Informer/PEMS08.py --gpus '0'
diff --git a/baselines/LSTM/CA.py b/baselines/LSTM/CA.py
deleted file mode 100644
index a041ee43..00000000
--- a/baselines/LSTM/CA.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import LSTM
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "CA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "LSTM"
-CFG.MODEL.ARCH = LSTM
-CFG.MODEL.PARAM = {
- # input_dim, embed_dim, hidden_dim, end_dim, num_layer, dropout, horizon
- "input_dim": 2,
- "embed_dim": 32,
- "hidden_dim": 64,
- "end_dim": 512,
- "num_layer": 2,
- "dropout": 0.1,
- "horizon": 12
- }
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/LSTM/GBA.py b/baselines/LSTM/GBA.py
deleted file mode 100644
index 7a002c06..00000000
--- a/baselines/LSTM/GBA.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import LSTM
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "GBA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "LSTM"
-CFG.MODEL.ARCH = LSTM
-CFG.MODEL.PARAM = {
- # input_dim, embed_dim, hidden_dim, end_dim, num_layer, dropout, horizon
- "input_dim": 2,
- "embed_dim": 32,
- "hidden_dim": 64,
- "end_dim": 512,
- "num_layer": 2,
- "dropout": 0.1,
- "horizon": 12
- }
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/LSTM/GLA.py b/baselines/LSTM/GLA.py
deleted file mode 100644
index 7ba651b9..00000000
--- a/baselines/LSTM/GLA.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import LSTM
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "GLA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "LSTM"
-CFG.MODEL.ARCH = LSTM
-CFG.MODEL.PARAM = {
- # input_dim, embed_dim, hidden_dim, end_dim, num_layer, dropout, horizon
- "input_dim": 2,
- "embed_dim": 32,
- "hidden_dim": 64,
- "end_dim": 512,
- "num_layer": 2,
- "dropout": 0.1,
- "horizon": 12
- }
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/LSTM/SD.py b/baselines/LSTM/SD.py
deleted file mode 100644
index 4d5690b0..00000000
--- a/baselines/LSTM/SD.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-
-from .arch import LSTM
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Graph WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "SD"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "LSTM"
-CFG.MODEL.ARCH = LSTM
-CFG.MODEL.PARAM = {
- # input_dim, embed_dim, hidden_dim, end_dim, num_layer, dropout, horizon
- "input_dim": 2,
- "embed_dim": 32,
- "hidden_dim": 64,
- "end_dim": 512,
- "num_layer": 2,
- "dropout": 0.1,
- "horizon": 12
- }
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/LSTM/arch/__init__.py b/baselines/LSTM/arch/__init__.py
deleted file mode 100644
index 77096196..00000000
--- a/baselines/LSTM/arch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .lstm_arch import LSTM
\ No newline at end of file
diff --git a/baselines/LSTM/arch/lstm_arch.py b/baselines/LSTM/arch/lstm_arch.py
deleted file mode 100644
index 456e21d4..00000000
--- a/baselines/LSTM/arch/lstm_arch.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-class LSTM(nn.Module):
- def __init__(self, input_dim, embed_dim, hidden_dim, end_dim, num_layer, dropout, horizon):
- """Init LSTM.
-
- Args:
- input_dim (int): number of input features.
- embed_dim (int): dimension of the input embedding layer (a linear layer).
- hidden_dim (int): hidden size in LSTM.
- end_dim (int): hidden dimension of the output linear layer.
- num_layer (int): number of layers in LSTM.
- dropout (float): dropout rate.
- horizon (int): number of time steps to be predicted.
- """
- super(LSTM, self).__init__()
- self.start_conv = nn.Conv2d(in_channels=input_dim,
- out_channels=embed_dim,
- kernel_size=(1,1))
-
- self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_dim, num_layers=num_layer, batch_first=True, dropout=dropout)
-
- self.end_linear1 = nn.Linear(hidden_dim, end_dim)
- self.end_linear2 = nn.Linear(end_dim, horizon)
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- """Feedforward function of LSTM.
-
- Args:
- history_data (torch.Tensor): shape [B, L, N, C]
-
- Returns:
- torch.Tensor: [B, L, N, 1]
- """
- x = history_data.transpose(1, 3)
- b, c, n, l = x.shape
-
- x = x.transpose(1,2).reshape(b*n, c, 1, l)
- x = self.start_conv(x).squeeze().transpose(1, 2)
-
- out, _ = self.lstm(x)
- x = out[:, -1, :]
-
- x = F.relu(self.end_linear1(x))
- x = self.end_linear2(x)
- x = x.reshape(b, n, l, 1).transpose(1, 2)
- return x
diff --git a/baselines/LightGBM/README.md b/baselines/LightGBM/README.md
deleted file mode 100644
index 01f25e8b..00000000
--- a/baselines/LightGBM/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Requirements:
-
-```
-pip install lightgbm
-```
diff --git a/baselines/LightGBM/Weather.py b/baselines/LightGBM/Weather.py
deleted file mode 100644
index a7931ec2..00000000
--- a/baselines/LightGBM/Weather.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import os
-import sys
-sys.path.append(os.path.abspath(__file__ + "/.."))
-
-# from evaluate_ar import evaluate
-from evaluate import evaluate
-
-import numpy as np
-
-# construct configs
-dataset_name = "Weather"
-input_len = 336
-output_len = 336
-gpu_num = 1
-null_val = np.nan
-train_data_dir = "datasets/" + dataset_name
-rescale = True
-batch_size = 128 # only used for collecting data
-project_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# print(evaluate(project_dir, train_data_dir, input_len, output_len, rescale, null_val, batch_size, patch_len=1))
-print(evaluate(project_dir, train_data_dir, input_len, output_len, rescale, null_val, batch_size))
diff --git a/baselines/LightGBM/evaluate.py b/baselines/LightGBM/evaluate.py
deleted file mode 100644
index 73cdc87c..00000000
--- a/baselines/LightGBM/evaluate.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import torch
-import lightgbm as lgb
-import os
-import sys
-sys.path.append("/workspace/S22/BasicTS")
-from basicts.utils import load_pkl
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mae, masked_rmse, masked_mape, masked_wape
-from basicts.data import SCALER_REGISTRY
-
-
-def evaluate(project_dir, train_data_dir, input_len, output_len, rescale, null_val, batch_size):
-
- # construct dataset
- data_file_path = project_dir + "/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
- index_file_path = project_dir + "/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
-
- train_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="train")
- train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
-
- valid_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="valid")
- valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False)
-
- test_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="test")
- test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
-
- # training & validation
- Xs_train = []
- Ys_train = []
- Xs_valid = []
- Ys_valid = []
- Xs_test = []
- Ys_test = []
-
- for i, (target, data) in enumerate(train_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_train.append(data)
- Ys_train.append(target)
-
- for i, (target, data) in enumerate(valid_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_valid.append(data)
- Ys_valid.append(target)
-
- for i, (target, data) in enumerate(test_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_test.append(data)
- Ys_test.append(target)
-
- Xs_train = torch.cat(Xs_train, dim=0).numpy()
- Ys_train = torch.cat(Ys_train, dim=0).numpy()
- Xs_valid = torch.cat(Xs_valid, dim=0).numpy()
- Ys_valid = torch.cat(Ys_valid, dim=0).numpy()
- Xs_test = torch.cat(Xs_test, dim=0).numpy()
- Ys_test = torch.cat(Ys_test, dim=0).numpy()
-
- # direct forecasting
- from sklearn.multioutput import MultiOutputRegressor
- model = MultiOutputRegressor(lgb.LGBMRegressor(), n_jobs = -1)
- model.fit(Xs_train, Ys_train)
- # inference
- preds_test = model.predict(Xs_test)
- print(preds_test.shape)
- # rescale
- scaler = load_pkl(project_dir + "/{0}/scaler_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale))
- preds_test = torch.Tensor(preds_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- Ys_test = torch.Tensor(Ys_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- prediction = SCALER_REGISTRY.get(scaler["func"])(preds_test, **scaler["args"])
- real_value = SCALER_REGISTRY.get(scaler["func"])(Ys_test, **scaler["args"])
- # print results
- print("MAE: ", masked_mae(prediction, real_value, null_val).item())
- print("RMSE: ", masked_rmse(prediction, real_value, null_val).item())
- print("MAPE: {:.2f}%".format(masked_mape(prediction, real_value, null_val) * 100))
- print("WAPE: {:.2f}%".format(masked_wape(prediction, real_value, null_val) * 100))
diff --git a/baselines/LightGBM/evaluate_ar.py b/baselines/LightGBM/evaluate_ar.py
deleted file mode 100644
index 8042a847..00000000
--- a/baselines/LightGBM/evaluate_ar.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import torch
-import lightgbm as lgb
-import os
-import sys
-sys.path.append("/workspace/S22/BasicTS")
-import numpy as np
-from tqdm import tqdm
-from basicts.utils import load_pkl
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mae, masked_rmse, masked_mape, masked_wape
-from basicts.data import SCALER_REGISTRY
-
-
-def evaluate(project_dir, train_data_dir, input_len, output_len, rescale, null_val, batch_size, patch_len, down_sampling=1):
- assert output_len % patch_len == 0
- num_steps = output_len // patch_len
- # construct dataset
- data_file_path = project_dir + "/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
- index_file_path = project_dir + "/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
-
- train_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="train")
- train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
-
- valid_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="valid")
- valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False)
-
- test_set = TimeSeriesForecastingDataset(data_file_path, index_file_path, mode="test")
- test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
-
- # training & validation
- Xs_train = []
- Ys_train = []
- Xs_valid = []
- Ys_valid = []
- Xs_test = []
- Ys_test = []
-
- for i, (target, data) in enumerate(train_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- B, L, N, C = target.shape
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_train.append(data)
- Ys_train.append(target)
-
- Xs_train = torch.cat(Xs_train, dim=0).numpy()[::down_sampling, :]
- Ys_train = torch.cat(Ys_train, dim=0).numpy()[::down_sampling, :][:, :patch_len]
- print("Xs_train: ", Xs_train.shape)
-
- # direct forecasting
- from sklearn.multioutput import MultiOutputRegressor
- model = MultiOutputRegressor(lgb.LGBMRegressor(), n_jobs = -1)
- model.fit(Xs_train, Ys_train)
-
- import pickle
- # sange model
- with open("model.pkl", "wb") as f:
- pickle.dump(model, f)
-
- for i, (target, data) in enumerate(test_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- B, L, N, C = target.shape
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_test.append(data)
- Ys_test.append(target)
-
- Xs_test = torch.cat(Xs_test, dim=0).numpy()
- Ys_test = torch.cat(Ys_test, dim=0).numpy()
- print("Xs_test: ", Xs_test.shape)
-
- # inference
- preds_test = []
- input_data = Xs_test
-
- for i in tqdm(range(num_steps)):
- # Predict the next step
- pred_step = model.predict(input_data)
- preds_test.append(pred_step)
- # Update input_data to include predicted step for next prediction
- input_data = np.concatenate([input_data[:, patch_len:], pred_step[:, :]], axis=1)
- # concat preds_test
- # preds_test = np.vstack(preds_test).T
- preds_test = np.concatenate(preds_test, axis=1)
-
- # rescale
- scaler = load_pkl(project_dir + "/{0}/scaler_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale))
- preds_test = torch.Tensor(preds_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- Ys_test = torch.Tensor(Ys_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- prediction = SCALER_REGISTRY.get(scaler["func"])(preds_test, **scaler["args"])
- real_value = SCALER_REGISTRY.get(scaler["func"])(Ys_test, **scaler["args"])
- # print results
- print("MAE: ", masked_mae(prediction, real_value, null_val).item())
- print("RMSE: ", masked_rmse(prediction, real_value, null_val).item())
- print("MAPE: {:.2f}%".format(masked_mape(prediction, real_value, null_val) * 100))
- print("WAPE: {:.2f}%".format(masked_wape(prediction, real_value, null_val) * 100))
diff --git a/baselines/LightGBM/evaluate_m4_ar.py b/baselines/LightGBM/evaluate_m4_ar.py
deleted file mode 100644
index 03768745..00000000
--- a/baselines/LightGBM/evaluate_m4_ar.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import torch
-import lightgbm as lgb
-import os
-import sys
-sys.path.append("/workspace/S22/BasicTS")
-import numpy as np
-from tqdm import tqdm
-from basicts.utils import load_pkl
-from basicts.data import M4ForecastingDataset
-from basicts.metrics import masked_mae, masked_rmse, masked_mape, masked_wape
-from basicts.data import SCALER_REGISTRY
-
-
-def evaluate(project_dir, train_data_dir, input_len, output_len, rescale, null_val, batch_size, patch_len, down_sampling=1, seasonal_pattern=None):
- assert output_len % patch_len == 0
- num_steps = output_len // patch_len
- # construct dataset
- data_file_path = project_dir + "/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
- mask_file_path = project_dir + "/{0}/mask_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
- index_file_path = project_dir + "/{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(train_data_dir, input_len, output_len, rescale)
-
- train_set = M4ForecastingDataset(data_file_path, index_file_path, mask_file_path, mode="train")
- train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
-
- test_set = M4ForecastingDataset(data_file_path, index_file_path, mask_file_path, mode="test")
- test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
-
- # training & validation
- Xs_train = []
- Ys_train = []
- Xs_test = []
- Ys_test = []
-
- for i, (target, data, future_mask, history_mask) in enumerate(train_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- B, L, N, C = target.shape
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_train.append(data)
- Ys_train.append(target)
-
- Xs_train = torch.cat(Xs_train, dim=0).numpy()[::down_sampling, :]
- Ys_train = torch.cat(Ys_train, dim=0).numpy()[::down_sampling, :][:, :patch_len]
- print("Xs_train: ", Xs_train.shape)
-
- # direct forecasting
- from sklearn.multioutput import MultiOutputRegressor
- model = MultiOutputRegressor(lgb.LGBMRegressor(), n_jobs = -1)
- model.fit(Xs_train, Ys_train)
-
- for i, (target, data, future_mask, history_mask) in enumerate(test_loader):
- B, L, N, C = data.shape
- data = data.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- B, L, N, C = target.shape
- target = target.transpose(1, 2).reshape(B*N, L, C)[:, :, 0]
- Xs_test.append(data)
- Ys_test.append(target)
-
- Xs_test = torch.cat(Xs_test, dim=0).numpy()
- Ys_test = torch.cat(Ys_test, dim=0).numpy()
- print("Xs_test: ", Xs_test.shape)
-
- # inference
- preds_test = []
- input_data = Xs_test
-
- for i in tqdm(range(num_steps)):
- # Predict the next step
- pred_step = model.predict(input_data)
- preds_test.append(pred_step)
- # Update input_data to include predicted step for next prediction
- input_data = np.concatenate([input_data[:, patch_len:], pred_step[:, :]], axis=1)
- # concat preds_test
- # preds_test = np.vstack(preds_test).T
- preds_test = np.concatenate(preds_test, axis=1)
-
- # rescale
- preds_test = torch.Tensor(preds_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- Ys_test = torch.Tensor(Ys_test).view(-1, N, output_len).transpose(1, 2).unsqueeze(-1)
- prediction = preds_test
- real_value = Ys_test
- np.save("/workspace/S22/BasicTS/baselines/LightGBM/M4_{0}.npy".format(seasonal_pattern), prediction.unsqueeze(-1).unsqueeze(-1).numpy())
-
- # print results
- print("MAE: ", masked_mae(prediction, real_value, null_val).item())
- print("RMSE: ", masked_rmse(prediction, real_value, null_val).item())
- print("MAPE: {:.2f}%".format(masked_mape(prediction, real_value, null_val) * 100))
- print("WAPE: {:.2f}%".format(masked_wape(prediction, real_value, null_val) * 100))
- # save
-
\ No newline at end of file
diff --git a/baselines/LightGBM/run.sh b/baselines/LightGBM/run.sh
deleted file mode 100644
index a0724000..00000000
--- a/baselines/LightGBM/run.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# python baselines/LightGBM/METR-LA.py
-# python baselines/LightGBM/PEMS-BAY.py
-# python baselines/LightGBM/PEMS03.py
-# python baselines/LightGBM/PEMS04.py
-# python baselines/LightGBM/PEMS07.py
-# python baselines/LightGBM/PEMS08.py
-
-# python baselines/LightGBM/ETTh1.py
-# python baselines/LightGBM/ETTm1.py
-# python baselines/LightGBM/Weather.py
-# python baselines/LightGBM/PEMS08_ltsf.py
-# python baselines/LightGBM/PEMS04_ltsf.py
-
-python baselines/LightGBM/Electricity.py
-python baselines/LightGBM/ExchangeRate.py
diff --git a/baselines/Linear/ETTh1.py b/baselines/Linear/ETTh1.py
deleted file mode 100644
index 727cd4a0..00000000
--- a/baselines/Linear/ETTh1.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/ETTh2.py b/baselines/Linear/ETTh2.py
deleted file mode 100644
index 21bcd8ad..00000000
--- a/baselines/Linear/ETTh2.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/ETTm1.py b/baselines/Linear/ETTm1.py
deleted file mode 100644
index ab4f84d7..00000000
--- a/baselines/Linear/ETTm1.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/ETTm2.py b/baselines/Linear/ETTm2.py
deleted file mode 100644
index 36b9499e..00000000
--- a/baselines/Linear/ETTm2.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/Electricity.py b/baselines/Linear/Electricity.py
deleted file mode 100644
index 866d094b..00000000
--- a/baselines/Linear/Electricity.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 321
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/ExchangeRate.py b/baselines/Linear/ExchangeRate.py
deleted file mode 100644
index 85ac8426..00000000
--- a/baselines/Linear/ExchangeRate.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 8
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/METR-LA.py b/baselines/Linear/METR-LA.py
deleted file mode 100644
index f6312927..00000000
--- a/baselines/Linear/METR-LA.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 207
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/PEMS-BAY.py b/baselines/Linear/PEMS-BAY.py
deleted file mode 100644
index 0882494c..00000000
--- a/baselines/Linear/PEMS-BAY.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 325
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/PEMS04.py b/baselines/Linear/PEMS04.py
deleted file mode 100644
index 9af3b46b..00000000
--- a/baselines/Linear/PEMS04.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 307
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/PEMS08.py b/baselines/Linear/PEMS08.py
deleted file mode 100644
index 815f59aa..00000000
--- a/baselines/Linear/PEMS08.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 170
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/Weather.py b/baselines/Linear/Weather.py
deleted file mode 100644
index e856e0b3..00000000
--- a/baselines/Linear/Weather.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Linear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Linear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Linear"
-CFG.MODEL.ARCH = Linear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 21
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Linear/arch/__init__.py b/baselines/Linear/arch/__init__.py
deleted file mode 100644
index b632e725..00000000
--- a/baselines/Linear/arch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .linear import Linear
\ No newline at end of file
diff --git a/baselines/Linear/arch/linear.py b/baselines/Linear/arch/linear.py
deleted file mode 100644
index 67d74d9e..00000000
--- a/baselines/Linear/arch/linear.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import torch
-import torch.nn as nn
-
-class Linear(nn.Module):
- """
- Paper: Are Transformers Effective for Time Series Forecasting?
- Link: https://arxiv.org/abs/2205.13504
- Official Code: https://github.com/cure-lab/DLinear
- """
-
- def __init__(self, **model_args):
- super(Linear, self).__init__()
- self.seq_len = model_args["seq_len"]
- self.pred_len = model_args["pred_len"]
- self.Linear = nn.Linear(self.seq_len, self.pred_len)
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- """Feed forward of Linear.
-
- Args:
- history_data (torch.Tensor): history data with shape [B, L, N, C]
-
- Returns:
- torch.Tensor: prediction with shape [B, L, N, C]
- """
-
- assert history_data.shape[-1] == 1 # only use the target feature
- history_data = history_data[..., 0] # B, L, N
- prediction = self.Linear(history_data.permute(0, 2, 1)).permute(0, 2, 1).unsqueeze(-1) # B, L, N, 1
- return prediction
diff --git a/baselines/Linear/run.sh b/baselines/Linear/run.sh
deleted file mode 100644
index bacf125c..00000000
--- a/baselines/Linear/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Linear/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Linear/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Linear/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Linear/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Linear/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Linear/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Linear/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Linear/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Linear/PEMS08.py --gpus '0'
diff --git a/baselines/MLP/M4.py b/baselines/MLP/M4.py
deleted file mode 100644
index 11e70ca7..00000000
--- a/baselines/MLP/M4.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import M4ForecastingRunner
-from basicts.losses import masked_mae
-from basicts.data import M4ForecastingDataset
-
-from .mlp_arch import MultiLayerPerceptron
-
-def get_cfg(seasonal_pattern):
- assert seasonal_pattern in ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- prediction_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- history_size = 2
- history_len = history_size * prediction_len
-
- CFG = EasyDict()
-
- # ================= general ================= #
- CFG.DESCRIPTION = "Multi-layer perceptron model configuration "
- CFG.RUNNER = M4ForecastingRunner
- CFG.DATASET_CLS = M4ForecastingDataset
- CFG.DATASET_NAME = "M4_" + seasonal_pattern
- CFG.DATASET_INPUT_LEN = history_len
- CFG.DATASET_OUTPUT_LEN = prediction_len
- CFG.GPU_NUM = 1
-
- # ================= environment ================= #
- CFG.ENV = EasyDict()
- CFG.ENV.SEED = 1
- CFG.ENV.CUDNN = EasyDict()
- CFG.ENV.CUDNN.ENABLED = True
-
- # ================= model ================= #
- CFG.MODEL = EasyDict()
- CFG.MODEL.NAME = "MultiLayerPerceptron"
- CFG.MODEL.ARCH = MultiLayerPerceptron
- CFG.MODEL.PARAM = {
- "history_seq_len": CFG.DATASET_INPUT_LEN,
- "prediction_seq_len": CFG.DATASET_OUTPUT_LEN,
- "hidden_dim": 32
- }
- CFG.MODEL.FORWARD_FEATURES = [0]
- CFG.MODEL.TARGET_FEATURES = [0]
-
- # ================= optim ================= #
- CFG.TRAIN = EasyDict()
- CFG.TRAIN.LOSS = masked_mae
- CFG.TRAIN.OPTIM = EasyDict()
- CFG.TRAIN.OPTIM.TYPE = "Adam"
- CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 1.0e-5,
- "eps": 1.0e-8
- }
- CFG.TRAIN.LR_SCHEDULER = EasyDict()
- CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
- CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
- "gamma": 0.5
- }
-
- # ================= train ================= #
- CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
- }
- CFG.TRAIN.NUM_EPOCHS = 5
- CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
- )
- # train data
- CFG.TRAIN.DATA = EasyDict()
- # read data
- CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TRAIN.DATA.BATCH_SIZE = 32
- CFG.TRAIN.DATA.PREFETCH = False
- CFG.TRAIN.DATA.SHUFFLE = True
- CFG.TRAIN.DATA.NUM_WORKERS = 2
- CFG.TRAIN.DATA.PIN_MEMORY = False
-
- # ================= test ================= #
- CFG.TEST = EasyDict()
- CFG.TEST.INTERVAL = CFG.TRAIN.NUM_EPOCHS
- # evluation
- # test data
- CFG.TEST.DATA = EasyDict()
- # read data
- CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TEST.DATA.BATCH_SIZE = 32
- CFG.TEST.DATA.PREFETCH = False
- CFG.TEST.DATA.SHUFFLE = False
- CFG.TEST.DATA.NUM_WORKERS = 2
- CFG.TEST.DATA.PIN_MEMORY = False
-
- # ================= evaluate ================= #
- CFG.EVAL = EasyDict()
- CFG.EVAL.HORIZONS = []
- CFG.EVAL.SAVE_PATH = os.path.abspath(__file__ + "/..")
-
- return CFG
diff --git a/baselines/MLP/MLP_METR-LA.py b/baselines/MLP/MLP_METR-LA.py
deleted file mode 100644
index 5394c89f..00000000
--- a/baselines/MLP/MLP_METR-LA.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .mlp_arch import MultiLayerPerceptron
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Multi-layer perceptron model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MultiLayerPerceptron"
-CFG.MODEL.ARCH = MultiLayerPerceptron
-CFG.MODEL.PARAM = {
- "history_seq_len": CFG.DATASET_INPUT_LEN,
- "prediction_seq_len": CFG.DATASET_OUTPUT_LEN,
- "hidden_dim": 32
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 1.0e-5,
- "eps": 1.0e-8
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 30, 38, 46, 54, 62, 70, 80],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/MLP/mlp_arch.py b/baselines/MLP/mlp_arch.py
deleted file mode 100644
index d2edba0d..00000000
--- a/baselines/MLP/mlp_arch.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import torch
-from torch import nn
-
-class MultiLayerPerceptron(nn.Module):
- """Two fully connected layer."""
-
- def __init__(self, history_seq_len: int, prediction_seq_len: int, hidden_dim: int):
- super().__init__()
- self.fc1 = nn.Linear(history_seq_len, hidden_dim)
- self.fc2 = nn.Linear(hidden_dim, prediction_seq_len)
- self.act = nn.ReLU()
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- """Feedforward function of AGCRN.
-
- Args:
- history_data (torch.Tensor): inputs with shape [B, L, N, C].
-
- Returns:
- torch.Tensor: outputs with shape [B, L, N, C]
- """
-
- history_data = history_data[..., 0].transpose(1, 2) # B, N, L
- prediction = self.fc2(self.act(self.fc1(history_data))).transpose(1, 2) # B, L, N
- return prediction.unsqueeze(-1) # B, L, N, C
\ No newline at end of file
diff --git a/baselines/MTGNN/METR-LA.py b/baselines/MTGNN/METR-LA.py
index fbc66766..b4b96486 100644
--- a/baselines/MTGNN/METR-LA.py
+++ b/baselines/MTGNN/METR-LA.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 207
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/PEMS-BAY.py b/baselines/MTGNN/PEMS-BAY.py
index 2edc8356..38e4290b 100644
--- a/baselines/MTGNN/PEMS-BAY.py
+++ b/baselines/MTGNN/PEMS-BAY.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 325
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/PEMS03.py b/baselines/MTGNN/PEMS03.py
index f9e4e4d9..7f1acff8 100644
--- a/baselines/MTGNN/PEMS03.py
+++ b/baselines/MTGNN/PEMS03.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 358
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/PEMS04.py b/baselines/MTGNN/PEMS04.py
index cd6d0284..09788bbc 100644
--- a/baselines/MTGNN/PEMS04.py
+++ b/baselines/MTGNN/PEMS04.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 307
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/PEMS07.py b/baselines/MTGNN/PEMS07.py
index b6862571..f6fcdaea 100644
--- a/baselines/MTGNN/PEMS07.py
+++ b/baselines/MTGNN/PEMS07.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 883
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/PEMS08.py b/baselines/MTGNN/PEMS08.py
index 27c86d31..705bd58c 100644
--- a/baselines/MTGNN/PEMS08.py
+++ b/baselines/MTGNN/PEMS08.py
@@ -1,49 +1,37 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import MTGNN
from .runner import MTGNNRunner
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MTGNN model configuration"
-CFG.RUNNER = MTGNNRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MTGNN"
-CFG.MODEL.ARCH = MTGNN
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MTGNN
buildA_true = True
num_nodes = 170
if buildA_true: # self-learned adjacency matrix
adj_mx = None
else: # use predefined adjacency matrix
- _, adj_mx = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "doubletransition")
+ _, adj_mx = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition")
adj_mx = torch.tensor(adj_mx)-torch.eye(num_nodes)
-
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"gcn_true" : True,
"buildA_true": buildA_true,
"gcn_depth": 2,
@@ -65,77 +53,112 @@
"tanhalpha":3,
"layer_norm_affline":True
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config '
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = MTGNNRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
-CFG.TRAIN.CUSTOM.STEP_SIZE = 100
-CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
-CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
## curriculum learning
CFG.TRAIN.CL = EasyDict()
CFG.TRAIN.CL.WARM_EPOCHS = 0
CFG.TRAIN.CL.CL_EPOCHS = 3
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
+# ================= train ================= #
+CFG.TRAIN.CUSTOM = EasyDict() # MTGNN custom training args
+CFG.TRAIN.CUSTOM.STEP_SIZE = 100
+CFG.TRAIN.CUSTOM.NUM_NODES = num_nodes
+CFG.TRAIN.CUSTOM.NUM_SPLIT = 1
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/MTGNN/run.sh b/baselines/MTGNN/run.sh
deleted file mode 100644
index 529202f2..00000000
--- a/baselines/MTGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/MTGNN/METR-LA.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS-BAY.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS03.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS04.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS07.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS08.py --gpus '3'
diff --git a/baselines/MTGNN/runner/mtgnn_runner.py b/baselines/MTGNN/runner/mtgnn_runner.py
index c6129dd2..04af4bc6 100644
--- a/baselines/MTGNN/runner/mtgnn_runner.py
+++ b/baselines/MTGNN/runner/mtgnn_runner.py
@@ -3,10 +3,10 @@
import torch
import numpy as np
-from basicts.runners import BaseTimeSeriesForecastingRunner
+from basicts.runners import SimpleTimeSeriesForecastingRunner
-class MTGNNRunner(BaseTimeSeriesForecastingRunner):
+class MTGNNRunner(SimpleTimeSeriesForecastingRunner):
def __init__(self, cfg: dict):
super().__init__(cfg)
self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
@@ -47,22 +47,10 @@ def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
return data
def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
-
- Args:
- data (tuple): data (future data, history data). [B, L, N, C] for each of them
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
-
- Returns:
- tuple: (prediction, real_value). [B, L, N, C] for each of them.
- """
-
if train:
- future_data, history_data, idx = data
+ future_data, history_data, idx = data['target'], data['inputs'], data['idx']
else:
- future_data, history_data = data
+ future_data, history_data = data['target'], data['inputs']
idx = None
history_data = self.to_running_device(history_data) # B, L, N, C
@@ -83,18 +71,6 @@ def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: b
return model_return
def train_iters(self, epoch: int, iter_index: int, data: Union[torch.Tensor, Tuple]) -> torch.Tensor:
- """It must be implement to define training detail.
-
- If it returns `loss`, the function ```self.backward``` will be called.
-
- Args:
- epoch (int): current epoch.
- iter_index (int): current iter.
- data (torch.Tensor or tuple): Data provided by DataLoader
-
- Returns:
- loss (torch.Tensor)
- """
if iter_index % self.step_size == 0:
self.perm = np.random.permutation(range(self.num_nodes))
@@ -106,7 +82,11 @@ def train_iters(self, epoch: int, iter_index: int, data: Union[torch.Tensor, Tup
else:
idx = self.perm[j * num_sub:]
idx = torch.tensor(idx)
- future_data, history_data = data
- data = future_data[:, :, idx, :], history_data[:, :, idx, :], idx
+ future_data, history_data = data['target'][:, :, idx, :], data['inputs'][:, :, idx, :]
+ data = {
+ 'target': future_data,
+ 'inputs': history_data,
+ 'idx': idx
+ }
loss = super().train_iters(epoch, iter_index, data)
self.backward(loss)
diff --git a/baselines/MegaCRN/METR-LA.py b/baselines/MegaCRN/METR-LA.py
new file mode 100644
index 00000000..677915c5
--- /dev/null
+++ b/baselines/MegaCRN/METR-LA.py
@@ -0,0 +1,148 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import MegaCRN
+from .loss import megacrn_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MegaCRN
+MODEL_PARAM = {
+ "num_nodes": 207,
+ "input_dim": 1,
+ "output_dim": 1,
+ "horizon": 12,
+ "rnn_units": 64,
+ "num_layers":1,
+ "cheb_k":3,
+ "ycov_dim":1,
+ "mem_num":20,
+ "mem_dim":64,
+ "cl_decay_steps":2000,
+ "use_curriculum_learning":True
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = megacrn_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.01,
+ "eps": 1e-3
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [50, 100],
+ "gamma": 0.1
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+#
\ No newline at end of file
diff --git a/baselines/MegaCRN/MegaCRN_METR-LA.py b/baselines/MegaCRN/MegaCRN_METR-LA.py
deleted file mode 100644
index a2d533de..00000000
--- a/baselines/MegaCRN/MegaCRN_METR-LA.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-
-from .arch import MegaCRN
-from .loss import megacrn_loss
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "MegaCRN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "MegaCRN"
-CFG.MODEL.ARCH = MegaCRN
-CFG.MODEL.PARAM = {
- "num_nodes": 207,
- "input_dim": 1,
- "output_dim": 1,
- "horizon": 12,
- "rnn_units": 64,
- "num_layers":1,
- "cheb_k":3,
- "ycov_dim":1,
- "mem_num":20,
- "mem_dim":64,
- "cl_decay_steps":2000,
- "use_curriculum_learning":True
-}
-CFG.MODEL.SETUP_GRAPH = True
-CFG.MODEL.FORWARD_FEATURES = [0, 1]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = megacrn_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.01,
- "eps": 1e-3
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [50, 100],
- "gamma": 0.1
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/MegaCRN/arch/megacrn_arch.py b/baselines/MegaCRN/arch/megacrn_arch.py
index 4d68269f..d49dd857 100644
--- a/baselines/MegaCRN/arch/megacrn_arch.py
+++ b/baselines/MegaCRN/arch/megacrn_arch.py
@@ -12,9 +12,9 @@ def __init__(self, dim_in, dim_out, cheb_k):
self.bias = nn.Parameter(torch.FloatTensor(dim_out))
nn.init.xavier_normal_(self.weights)
nn.init.constant_(self.bias, val=0)
-
+
def forward(self, x, supports):
- x_g = []
+ x_g = []
support_set = []
for support in supports:
support_ks = [torch.eye(support.shape[0]).to(support.device), support]
@@ -26,7 +26,7 @@ def forward(self, x, supports):
x_g = torch.cat(x_g, dim=-1) # B, N, 2 * cheb_k * dim_in
x_gconv = torch.einsum('bni,io->bno', x_g, self.weights) + self.bias # b, N, dim_out
return x_gconv
-
+
class AGCRNCell(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k):
super(AGCRNCell, self).__init__()
@@ -49,7 +49,7 @@ def forward(self, x, state, supports):
def init_hidden_state(self, batch_size):
return torch.zeros(batch_size, self.node_num, self.hidden_dim)
-
+
class ADCRNN_Encoder(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, num_layers):
super(ADCRNN_Encoder, self).__init__()
@@ -81,7 +81,7 @@ def forward(self, x, init_state, supports):
#output_hidden: the last state for each layer: (num_layers, B, N, hidden_dim)
#return current_inputs, torch.stack(output_hidden, dim=0)
return current_inputs, output_hidden
-
+
def init_hidden(self, batch_size):
init_states = []
for i in range(self.num_layers):
@@ -150,7 +150,7 @@ def __init__(self, num_nodes, input_dim, output_dim, horizon, rnn_units, num_lay
self.ycov_dim = ycov_dim
self.cl_decay_steps = cl_decay_steps
self.use_curriculum_learning = use_curriculum_learning
-
+
# memory
self.mem_num = mem_num
self.mem_dim = mem_dim
@@ -158,14 +158,14 @@ def __init__(self, num_nodes, input_dim, output_dim, horizon, rnn_units, num_lay
# encoder
self.encoder = ADCRNN_Encoder(self.num_nodes, self.input_dim, self.rnn_units, self.cheb_k, self.num_layers)
-
+
# deocoder
self.decoder_dim = self.rnn_units + self.mem_dim
self.decoder = ADCRNN_Decoder(self.num_nodes, self.output_dim + self.ycov_dim, self.decoder_dim, self.cheb_k, self.num_layers)
# output
self.proj = nn.Sequential(nn.Linear(self.decoder_dim, self.output_dim, bias=True))
-
+
def compute_sampling_threshold(self, batches_seen):
return self.cl_decay_steps / (self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps))
@@ -178,7 +178,7 @@ def construct_memory(self):
for param in memory_dict.values():
nn.init.xavier_normal_(param)
return memory_dict
-
+
def query_memory(self, h_t:torch.Tensor):
query = torch.matmul(h_t, self.memory['Wq']) # (B, N, d)
att_score = torch.softmax(torch.matmul(query, self.memory['Memory'].t()), dim=-1) # alpha: (B, N, M)
@@ -187,7 +187,7 @@ def query_memory(self, h_t:torch.Tensor):
pos = self.memory['Memory'][ind[:, :, 0]] # B, N, d
neg = self.memory['Memory'][ind[:, :, 1]] # B, N, d
return value, query, pos, neg
-
+
def forward(self, history_data, future_data, batch_seen=None, epoch=None, **kwargs):
# def forward(self, x, y_cov, labels=None, batches_seen=None):
x = history_data[..., [0]]
@@ -201,11 +201,11 @@ def forward(self, history_data, future_data, batch_seen=None, epoch=None, **kwar
supports = [g1, g2]
init_state = self.encoder.init_hidden(x.shape[0])
h_en, state_en = self.encoder(x, init_state, supports) # B, T, N, hidden
- h_t = h_en[:, -1, :, :] # B, N, hidden (last state)
-
+ h_t = h_en[:, -1, :, :] # B, N, hidden (last state)
+
h_att, query, pos, neg = self.query_memory(h_t)
h_t = torch.cat([h_t, h_att], dim=-1)
-
+
ht_list = [h_t]*self.num_layers
go = torch.zeros((x.shape[0], self.num_nodes, self.output_dim), device=x.device)
out = []
@@ -218,5 +218,5 @@ def forward(self, history_data, future_data, batch_seen=None, epoch=None, **kwar
if c < self.compute_sampling_threshold(batch_seen):
go = labels[:, t, ...]
output = torch.stack(out, dim=1)
-
+
return {'prediction': output, 'query': query, 'pos': pos, 'neg': neg}
diff --git a/baselines/MegaCRN/loss/loss.py b/baselines/MegaCRN/loss/loss.py
index b7653ee5..54efb144 100644
--- a/baselines/MegaCRN/loss/loss.py
+++ b/baselines/MegaCRN/loss/loss.py
@@ -1,5 +1,5 @@
from torch import nn
-from basicts.losses import masked_mae
+from basicts.metrics import masked_mae
def megacrn_loss(prediction, target, query, pos, neg, null_val):
diff --git a/baselines/NBeats/ETTh1.py b/baselines/NBeats/ETTh1.py
index f7472eb4..d043c759 100644
--- a/baselines/NBeats/ETTh1.py
+++ b/baselines/NBeats/ETTh1.py
@@ -1,54 +1,104 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 7
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -60,52 +110,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/ETTm1.py b/baselines/NBeats/ETTm1.py
index 87aaac50..fc8d483f 100644
--- a/baselines/NBeats/ETTm1.py
+++ b/baselines/NBeats/ETTm1.py
@@ -1,54 +1,104 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 7
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -60,52 +110,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/Electricity.py b/baselines/NBeats/Electricity.py
index 92b6d07d..ce935a84 100644
--- a/baselines/NBeats/Electricity.py
+++ b/baselines/NBeats/Electricity.py
@@ -1,54 +1,104 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 321
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -60,57 +110,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/ExchangeRate.py b/baselines/NBeats/ExchangeRate.py
index 6c8fcdec..7b9b1e25 100644
--- a/baselines/NBeats/ExchangeRate.py
+++ b/baselines/NBeats/ExchangeRate.py
@@ -1,54 +1,104 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 8
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -60,52 +110,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/METR-LA.py b/baselines/NBeats/METR-LA.py
deleted file mode 100644
index 28a60b14..00000000
--- a/baselines/NBeats/METR-LA.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 128,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6 ,12]
diff --git a/baselines/NBeats/PEMS-BAY.py b/baselines/NBeats/PEMS-BAY.py
deleted file mode 100644
index f99146b1..00000000
--- a/baselines/NBeats/PEMS-BAY.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6 ,12]
diff --git a/baselines/NBeats/PEMS03.py b/baselines/NBeats/PEMS03.py
deleted file mode 100644
index de5fd489..00000000
--- a/baselines/NBeats/PEMS03.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6 ,12]
diff --git a/baselines/NBeats/PEMS04.py b/baselines/NBeats/PEMS04.py
deleted file mode 100644
index 04c749e1..00000000
--- a/baselines/NBeats/PEMS04.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6 ,12]
diff --git a/baselines/NBeats/PEMS04_LTSF.py b/baselines/NBeats/PEMS04_LTSF.py
index 79415e9e..b148b154 100644
--- a/baselines/NBeats/PEMS04_LTSF.py
+++ b/baselines/NBeats/PEMS04_LTSF.py
@@ -1,55 +1,107 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 307
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -61,52 +113,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/PEMS07.py b/baselines/NBeats/PEMS07.py
deleted file mode 100644
index f820cd14..00000000
--- a/baselines/NBeats/PEMS07.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6 ,12]
diff --git a/baselines/NBeats/PEMS08.py b/baselines/NBeats/PEMS08.py
deleted file mode 100644
index 5cc51c98..00000000
--- a/baselines/NBeats/PEMS08.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NBeats
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 10
- }
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":0,
- "eps":1.0e-8,
- "betas":(0.9, 0.95)
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[20, 40, 60, 80],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/NBeats/PEMS08_LTSF.py b/baselines/NBeats/PEMS08_LTSF.py
index d6334a83..f4e6595c 100644
--- a/baselines/NBeats/PEMS08_LTSF.py
+++ b/baselines/NBeats/PEMS08_LTSF.py
@@ -1,55 +1,107 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 170
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -61,52 +113,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/Weather.py b/baselines/NBeats/Weather.py
index 09f4af20..2ff91985 100644
--- a/baselines/NBeats/Weather.py
+++ b/baselines/NBeats/Weather.py
@@ -1,54 +1,104 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NBeats
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NBeats"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NBeats"
-CFG.MODEL.ARCH = NBeats
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 8
+MODEL_PARAM = {
"type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "output_size": OUTPUT_LEN,
"layer_size": 512,
"layers": 4,
"stacks": 10
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.001,
"weight_decay":0,
"eps":1.0e-8,
@@ -60,52 +110,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NBeats/arch/nbeats.py b/baselines/NBeats/arch/nbeats.py
index 5676e88f..8c856423 100644
--- a/baselines/NBeats/arch/nbeats.py
+++ b/baselines/NBeats/arch/nbeats.py
@@ -92,7 +92,7 @@ def __init__(self, input_size: int, type: str, output_size: int, **kwargs):
seasonality_layers = kwargs["seasonality_layers"]
seasonality_layer_size = kwargs["seasonality_layer_size"]
num_of_harmonics = kwargs["num_of_harmonics"]
-
+
trend_block = NBeatsBlock(input_size=input_size,
theta_size=2 * (degree_of_polynomial + 1),
basis_function=TrendBasis(degree_of_polynomial=degree_of_polynomial,
@@ -115,9 +115,9 @@ def forward(self, history_data: t.Tensor, **kwargs) -> t.Tensor:
B, L, N, C = history_data.shape
history_data = history_data[..., [0]].transpose(1, 2) # [B, N, L, 1]
history_data = history_data.reshape(B*N, L, 1)
-
+
x = history_data.squeeze()
-
+
residuals = x.flip(dims=(1,))
forecast = x[:, -1:]
for i, block in enumerate(self.blocks):
diff --git a/baselines/NBeats/run.sh b/baselines/NBeats/run.sh
deleted file mode 100644
index 259cba60..00000000
--- a/baselines/NBeats/run.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/NBeats/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS08.py --gpus '1'
-
-python experiments/train.py -c baselines/NBeats/ETTh1.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/ETTm1.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/Electricity.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/Weather.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/ExchangeRate.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS04_LTSF.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS08_LTSF.py --gpus '1'
diff --git a/baselines/NBeats_M4/M4.py b/baselines/NBeats_M4/M4.py
deleted file mode 100644
index f347957b..00000000
--- a/baselines/NBeats_M4/M4.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae
-from basicts.data import M4ForecastingDataset
-from basicts.runners import M4ForecastingRunner
-
-from .arch import NBeats
-
-def get_cfg(seasonal_pattern):
- assert seasonal_pattern in ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- prediction_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- num_nodes = {"Yearly": 23000, "Quarterly": 24000, "Monthly": 48000, "Weekly": 359, "Daily": 4227, "Hourly": 414}[seasonal_pattern]
- history_size = 2
- history_len = history_size * prediction_len
-
- CFG = EasyDict()
-
- # ================= general ================= #
- CFG.DESCRIPTION = "Multi-layer perceptron model configuration "
- CFG.RUNNER = M4ForecastingRunner
- CFG.DATASET_CLS = M4ForecastingDataset
- CFG.DATASET_NAME = "M4_" + seasonal_pattern
- CFG.DATASET_INPUT_LEN = history_len
- CFG.DATASET_OUTPUT_LEN = prediction_len
- CFG.GPU_NUM = 1
-
- # ================= environment ================= #
- CFG.ENV = EasyDict()
- CFG.ENV.SEED = 1
- CFG.ENV.CUDNN = EasyDict()
- CFG.ENV.CUDNN.ENABLED = True
-
- # ================= model ================= #
- CFG.MODEL = EasyDict()
- CFG.MODEL.NAME = "NBeats"
- CFG.MODEL.ARCH = NBeats
- CFG.MODEL.PARAM = {
- "type": "generic",
- "input_size": CFG.DATASET_INPUT_LEN,
- "output_size": CFG.DATASET_OUTPUT_LEN,
- "layer_size": 512,
- "layers": 4,
- "stacks": 30
- }
- # CFG.MODEL.PARAM = {
- # "type": "interpretable",
- # "input_size": CFG.DATASET_INPUT_LEN,
- # "output_size": CFG.DATASET_OUTPUT_LEN,
- # "seasonality_layer_size": 2048,
- # "seasonality_blocks": 3,
- # "seasonality_layers": 4,
- # "trend_layer_size": 256,
- # "degree_of_polynomial": 2,
- # "trend_blocks": 3,
- # "trend_layers": 4,
- # "num_of_harmonics": 1
- # }
- CFG.MODEL.FORWARD_FEATURES = [0]
- CFG.MODEL.TARGET_FEATURES = [0]
-
- # ================= optim ================= #
- CFG.TRAIN = EasyDict()
- CFG.TRAIN.LOSS = masked_mae
- CFG.TRAIN.OPTIM = EasyDict()
- CFG.TRAIN.OPTIM.TYPE = "Adam"
- CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
- }
- CFG.TRAIN.LR_SCHEDULER = EasyDict()
- CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
- CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 80],
- "gamma": 0.5
- }
-
- # ================= train ================= #
- CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
- }
- CFG.TRAIN.NUM_EPOCHS = 52
- CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
- )
- # train data
- CFG.TRAIN.DATA = EasyDict()
- # read data
- CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TRAIN.DATA.BATCH_SIZE = 64
- CFG.TRAIN.DATA.PREFETCH = False
- CFG.TRAIN.DATA.SHUFFLE = True
- CFG.TRAIN.DATA.NUM_WORKERS = 2
- CFG.TRAIN.DATA.PIN_MEMORY = False
-
- # ================= test ================= #
- CFG.TEST = EasyDict()
- CFG.TEST.INTERVAL = 52
- # test data
- CFG.TEST.DATA = EasyDict()
- # read data
- CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TEST.DATA.BATCH_SIZE = 64
- CFG.TEST.DATA.PREFETCH = False
- CFG.TEST.DATA.SHUFFLE = False
- CFG.TEST.DATA.NUM_WORKERS = 2
- CFG.TEST.DATA.PIN_MEMORY = False
-
- # ================= evaluate ================= #
- CFG.EVAL = EasyDict()
- CFG.EVAL.HORIZONS = []
- CFG.EVAL.SAVE_PATH = os.path.abspath(__file__ + "/..")
-
- return CFG
diff --git a/baselines/NBeats_M4/arch/__init__.py b/baselines/NBeats_M4/arch/__init__.py
deleted file mode 100644
index b2fe3c8e..00000000
--- a/baselines/NBeats_M4/arch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .nbeats import NBeats
diff --git a/baselines/NBeats_M4/arch/nbeats.py b/baselines/NBeats_M4/arch/nbeats.py
deleted file mode 100644
index a2d328f5..00000000
--- a/baselines/NBeats_M4/arch/nbeats.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# This source code is provided for the purposes of scientific reproducibility
-# under the following limited license from Element AI Inc. The code is an
-# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
-# expansion analysis for interpretable time series forecasting,
-# https://arxiv.org/abs/1905.10437). The copyright to the source code is
-# licensed under the Creative Commons - Attribution-NonCommercial 4.0
-# International license (CC BY-NC 4.0):
-# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
-# for the benefit of third parties or internally in production) requires an
-# explicit license. The subject-matter of the N-BEATS model and associated
-# materials are the property of Element AI Inc. and may be subject to patent
-# protection. No license to patents is granted hereunder (whether express or
-# implied). Copyright © 2020 Element AI Inc. All rights reserved.
-
-# Modified from:
-
-"""
-N-BEATS Model.
-"""
-from typing import Tuple
-
-import numpy as np
-import torch as t
-
-
-class NBeatsBlock(t.nn.Module):
- """
- N-BEATS block which takes a basis function as an argument.
- """
- def __init__(self,
- input_size,
- theta_size: int,
- basis_function: t.nn.Module,
- layers: int,
- layer_size: int):
- """
- N-BEATS block.
-
- :param input_size: Insample size.
- :param theta_size: Number of parameters for the basis function.
- :param basis_function: Basis function which takes the parameters and produces backcast and forecast.
- :param layers: Number of layers.
- :param layer_size: Layer size.
- """
- super().__init__()
- self.layers = t.nn.ModuleList([t.nn.Linear(in_features=input_size, out_features=layer_size)] +
- [t.nn.Linear(in_features=layer_size, out_features=layer_size)
- for _ in range(layers - 1)])
- self.basis_parameters = t.nn.Linear(in_features=layer_size, out_features=theta_size)
- self.basis_function = basis_function
-
- def forward(self, x: t.Tensor) -> Tuple[t.Tensor, t.Tensor]:
- block_input = x
- for layer in self.layers:
- block_input = t.relu(layer(block_input))
- basis_parameters = self.basis_parameters(block_input)
- return self.basis_function(basis_parameters)
-
-
-class NBeats(t.nn.Module):
- """
- Paper: N-BEATS: Neural basis expansion analysis for interpretable time series forecasting
- Link: https://arxiv.org/abs/1905.10437
- Official Code:
- https://github.com/ServiceNow/N-BEATS
- https://github.com/philipperemy/n-beats
- """
-
- def __init__(self, input_size: int, type: str, output_size: int, **kwargs):
- super().__init__()
- assert type in ["generic", "interpretable"], "Unknown type of N-Beats model"
- if type == "generic":
- # input_size: int, output_size: int,
- # stacks: int, layers: int, layer_size: int
- stacks = kwargs["stacks"]
- layers = kwargs["layers"]
- layer_size = kwargs["layer_size"]
- self.blocks = t.nn.ModuleList([NBeatsBlock(input_size=input_size,
- theta_size=input_size + output_size,
- basis_function=GenericBasis(backcast_size=input_size,
- forecast_size=output_size),
- layers=layers,
- layer_size=layer_size)
- for _ in range(stacks)])
- pass
- else:
- trend_blocks = kwargs["trend_blocks"]
- trend_layers = kwargs["trend_layers"]
- trend_layer_size = kwargs["trend_layer_size"]
- degree_of_polynomial = kwargs["degree_of_polynomial"]
- seasonality_blocks = kwargs["seasonality_blocks"]
- seasonality_layers = kwargs["seasonality_layers"]
- seasonality_layer_size = kwargs["seasonality_layer_size"]
- num_of_harmonics = kwargs["num_of_harmonics"]
-
- trend_block = NBeatsBlock(input_size=input_size,
- theta_size=2 * (degree_of_polynomial + 1),
- basis_function=TrendBasis(degree_of_polynomial=degree_of_polynomial,
- backcast_size=input_size,
- forecast_size=output_size),
- layers=trend_layers,
- layer_size=trend_layer_size)
- seasonality_block = NBeatsBlock(input_size=input_size,
- theta_size=4 * int(
- np.ceil(num_of_harmonics / 2 * output_size) - (num_of_harmonics - 1)),
- basis_function=SeasonalityBasis(harmonics=num_of_harmonics,
- backcast_size=input_size,
- forecast_size=output_size),
- layers=seasonality_layers,
- layer_size=seasonality_layer_size)
- self.blocks = t.nn.ModuleList(
- [trend_block for _ in range(trend_blocks)] + [seasonality_block for _ in range(seasonality_blocks)])
-
- def forward(self, history_data: t.Tensor, history_mask: t.Tensor, **kwargs) -> t.Tensor:
- x = history_data.squeeze()
- input_mask = history_mask.squeeze()
-
- residuals = x.flip(dims=(1,))
- input_mask = input_mask.flip(dims=(1,))
- forecast = x[:, -1:]
- for i, block in enumerate(self.blocks):
- backcast, block_forecast = block(residuals)
- residuals = (residuals - backcast) * input_mask
- forecast = forecast + block_forecast
- forecast = forecast.unsqueeze(-1).unsqueeze(-1)
- return forecast
-
-
-class GenericBasis(t.nn.Module):
- """
- Generic basis function.
- """
- def __init__(self, backcast_size: int, forecast_size: int):
- super().__init__()
- self.backcast_size = backcast_size
- self.forecast_size = forecast_size
-
- def forward(self, theta: t.Tensor):
- return theta[:, :self.backcast_size], theta[:, -self.forecast_size:]
-
-
-class TrendBasis(t.nn.Module):
- """
- Polynomial function to model trend.
- """
- def __init__(self, degree_of_polynomial: int, backcast_size: int, forecast_size: int):
- super().__init__()
- self.polynomial_size = degree_of_polynomial + 1 # degree of polynomial with constant term
- self.backcast_time = t.nn.Parameter(
- t.tensor(np.concatenate([np.power(np.arange(backcast_size, dtype=np.float) / backcast_size, i)[None, :]
- for i in range(self.polynomial_size)]), dtype=t.float32),
- requires_grad=False)
- self.forecast_time = t.nn.Parameter(
- t.tensor(np.concatenate([np.power(np.arange(forecast_size, dtype=np.float) / forecast_size, i)[None, :]
- for i in range(self.polynomial_size)]), dtype=t.float32), requires_grad=False)
-
- def forward(self, theta: t.Tensor):
- backcast = t.einsum('bp,pt->bt', theta[:, self.polynomial_size:], self.backcast_time)
- forecast = t.einsum('bp,pt->bt', theta[:, :self.polynomial_size], self.forecast_time)
- return backcast, forecast
-
-
-class SeasonalityBasis(t.nn.Module):
- """
- Harmonic functions to model seasonality.
- """
- def __init__(self, harmonics: int, backcast_size: int, forecast_size: int):
- super().__init__()
- self.frequency = np.append(np.zeros(1, dtype=np.float32),
- np.arange(harmonics, harmonics / 2 * forecast_size,
- dtype=np.float32) / harmonics)[None, :]
- backcast_grid = -2 * np.pi * (
- np.arange(backcast_size, dtype=np.float32)[:, None] / forecast_size) * self.frequency
- forecast_grid = 2 * np.pi * (
- np.arange(forecast_size, dtype=np.float32)[:, None] / forecast_size) * self.frequency
- self.backcast_cos_template = t.nn.Parameter(t.tensor(np.transpose(np.cos(backcast_grid)), dtype=t.float32),
- requires_grad=False)
- self.backcast_sin_template = t.nn.Parameter(t.tensor(np.transpose(np.sin(backcast_grid)), dtype=t.float32),
- requires_grad=False)
- self.forecast_cos_template = t.nn.Parameter(t.tensor(np.transpose(np.cos(forecast_grid)), dtype=t.float32),
- requires_grad=False)
- self.forecast_sin_template = t.nn.Parameter(t.tensor(np.transpose(np.sin(forecast_grid)), dtype=t.float32),
- requires_grad=False)
-
- def forward(self, theta: t.Tensor):
- params_per_harmonic = theta.shape[1] // 4
- backcast_harmonics_cos = t.einsum('bp,pt->bt', theta[:, 2 * params_per_harmonic:3 * params_per_harmonic],
- self.backcast_cos_template)
- backcast_harmonics_sin = t.einsum('bp,pt->bt', theta[:, 3 * params_per_harmonic:], self.backcast_sin_template)
- backcast = backcast_harmonics_sin + backcast_harmonics_cos
- forecast_harmonics_cos = t.einsum('bp,pt->bt',
- theta[:, :params_per_harmonic], self.forecast_cos_template)
- forecast_harmonics_sin = t.einsum('bp,pt->bt', theta[:, params_per_harmonic:2 * params_per_harmonic],
- self.forecast_sin_template)
- forecast = forecast_harmonics_sin + forecast_harmonics_cos
-
- return backcast, forecast
diff --git a/baselines/NHiTS/ETTm2.py b/baselines/NHiTS/ETTm2.py
index 0b212118..446d9565 100644
--- a/baselines/NHiTS/ETTm2.py
+++ b/baselines/NHiTS/ETTm2.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NHiTS
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NHiTS Config "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 1680
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NHiTS"
-CFG.MODEL.ARCH = NHiTS
-CFG.MODEL.PARAM = {
- "context_length": CFG.DATASET_INPUT_LEN,
- "prediction_length": CFG.DATASET_OUTPUT_LEN,
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NHiTS
+NUM_NODES = 7
+MODEL_PARAM = {
+ "context_length": INPUT_LEN,
+ "prediction_length": OUTPUT_LEN,
"output_size": 1,
"n_blocks": [1, 1, 1],
"n_layers": [2, 2, 2, 2, 2, 2, 2, 2],
@@ -43,16 +34,74 @@
"pooling_sizes": [8, 8, 8],
"downsample_frequencies": [24, 12, 1]
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr":0.0005,
"weight_decay":0,
"eps":1.0e-8,
"betas":(0.9, 0.95)
@@ -63,56 +112,30 @@
"milestones":[20, 40, 60, 80],
"gamma":0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "./checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 128
-CFG.TRAIN.DATA.PREFETCH = True
+CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 4
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 128
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 4
-CFG.VAL.DATA.PIN_MEMORY = True
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = os.path.join("./datasets", CFG.DATASET_NAME)
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 128
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 4
-CFG.TEST.DATA.PIN_MEMORY = True
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [6, 12, 24]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NHiTS/arch/nhits.py b/baselines/NHiTS/arch/nhits.py
index 5375d02c..3b455f0b 100644
--- a/baselines/NHiTS/arch/nhits.py
+++ b/baselines/NHiTS/arch/nhits.py
@@ -35,7 +35,7 @@ def __init__(self, context_length: int, prediction_length: int, output_size: int
dropout: float=0.0, activation: str="ReLU", initialization: str="lecun_normal",
batch_normalization: bool=False, shared_weights: bool=False, naive_level: bool=True):
super().__init__()
-
+
self.prediction_length = prediction_length
self.context_length = context_length
self.output_size = output_size
diff --git a/baselines/NLinear/ETTh1.py b/baselines/NLinear/ETTh1.py
index 75f96b7c..8c8b3874 100644
--- a/baselines/NLinear/ETTh1.py
+++ b/baselines/NLinear/ETTh1.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/ETTh2.py b/baselines/NLinear/ETTh2.py
index ce5dfd1b..3ba42e65 100644
--- a/baselines/NLinear/ETTh2.py
+++ b/baselines/NLinear/ETTh2.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/ETTm1.py b/baselines/NLinear/ETTm1.py
index 4d70dc31..ac2327bd 100644
--- a/baselines/NLinear/ETTm1.py
+++ b/baselines/NLinear/ETTm1.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/ETTm2.py b/baselines/NLinear/ETTm2.py
index 6f8d9707..afa88731 100644
--- a/baselines/NLinear/ETTm2.py
+++ b/baselines/NLinear/ETTm2.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 7
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 7
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/Electricity.py b/baselines/NLinear/Electricity.py
index 46a0d74e..15533710 100644
--- a/baselines/NLinear/Electricity.py
+++ b/baselines/NLinear/Electricity.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 321
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 321
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/ExchangeRate.py b/baselines/NLinear/ExchangeRate.py
index e1180fa3..863dc10e 100644
--- a/baselines/NLinear/ExchangeRate.py
+++ b/baselines/NLinear/ExchangeRate.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 8
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 8
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/METR-LA.py b/baselines/NLinear/METR-LA.py
deleted file mode 100644
index 02b4945c..00000000
--- a/baselines/NLinear/METR-LA.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 207
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/NLinear/PEMS-BAY.py b/baselines/NLinear/PEMS-BAY.py
deleted file mode 100644
index 4e14c765..00000000
--- a/baselines/NLinear/PEMS-BAY.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 325
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/NLinear/PEMS04.py b/baselines/NLinear/PEMS04.py
deleted file mode 100644
index 2107ab66..00000000
--- a/baselines/NLinear/PEMS04.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 307
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/NLinear/PEMS04_LTSF.py b/baselines/NLinear/PEMS04_LTSF.py
new file mode 100644
index 00000000..8a3a91c8
--- /dev/null
+++ b/baselines/NLinear/PEMS04_LTSF.py
@@ -0,0 +1,137 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 307
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0003,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/PEMS08.py b/baselines/NLinear/PEMS08.py
deleted file mode 100644
index 9968e6aa..00000000
--- a/baselines/NLinear/PEMS08.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import NLinear
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 170
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/NLinear/PEMS08_LTSF.py b/baselines/NLinear/PEMS08_LTSF.py
new file mode 100644
index 00000000..5286adba
--- /dev/null
+++ b/baselines/NLinear/PEMS08_LTSF.py
@@ -0,0 +1,137 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 170
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0003,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/Weather.py b/baselines/NLinear/Weather.py
index 8e661bac..6a4b7cb3 100644
--- a/baselines/NLinear/Weather.py
+++ b/baselines/NLinear/Weather.py
@@ -1,108 +1,134 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.metrics import masked_mse, masked_mae
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import NLinear
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "pred_len": OUTPUT_LEN,
+ "enc_in": 21
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "NLinear model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
-# ================= model ================= #
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "NLinear"
-CFG.MODEL.ARCH = NLinear
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "pred_len": CFG.DATASET_OUTPUT_LEN,
- "individual": False,
- "enc_in": 21
-}
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0003,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/NLinear/run.sh b/baselines/NLinear/run.sh
deleted file mode 100644
index b291051c..00000000
--- a/baselines/NLinear/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/NLinear/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/Weather.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/PEMS08.py --gpus '0'
diff --git a/baselines/PatchTST/ETTh1.py b/baselines/PatchTST/ETTh1.py
index 2f799111..d16fccfd 100644
--- a/baselines/PatchTST/ETTh1.py
+++ b/baselines/PatchTST/ETTh1.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 4,
"d_model": 16,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/ETTh2.py b/baselines/PatchTST/ETTh2.py
index 49ab3975..1113541e 100644
--- a/baselines/PatchTST/ETTh2.py
+++ b/baselines/PatchTST/ETTh2.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 16,
"d_model": 128,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/ETTm1.py b/baselines/PatchTST/ETTm1.py
index eb6d60b3..acce10ed 100644
--- a/baselines/PatchTST/ETTm1.py
+++ b/baselines/PatchTST/ETTm1.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 16,
"d_model": 128,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/ETTm2.py b/baselines/PatchTST/ETTm2.py
index 30bd3c73..4befeef2 100644
--- a/baselines/PatchTST/ETTm2.py
+++ b/baselines/PatchTST/ETTm2.py
@@ -1,50 +1,39 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
- "n_heads": 4,
- "d_model": 16,
- "d_ff": 128,
- "dropout": 0.3,
- "fc_dropout": 0.3,
+ "n_heads": 16,
+ "d_model": 128,
+ "d_ff": 256,
+ "dropout": 0.2,
+ "fc_dropout": 0.2,
"head_dropout": 0.0,
"patch_len": 16,
"stride": 8,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25],
+ "milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/Electricity.py b/baselines/PatchTST/Electricity.py
index a80d8bf0..280ab228 100644
--- a/baselines/PatchTST/Electricity.py
+++ b/baselines/PatchTST/Electricity.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 16,
"d_model": 128,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/ExchangeRate.py b/baselines/PatchTST/ExchangeRate.py
index e6b11725..f291b082 100644
--- a/baselines/PatchTST/ExchangeRate.py
+++ b/baselines/PatchTST/ExchangeRate.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 4,
"d_model": 16,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/M4.py b/baselines/PatchTST/M4.py
deleted file mode 100644
index edf115a9..00000000
--- a/baselines/PatchTST/M4.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae
-from basicts.data import M4ForecastingDataset
-from basicts.runners import M4ForecastingRunner
-
-from .arch import PatchTST
-
-def get_cfg(seasonal_pattern):
- assert seasonal_pattern in ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- prediction_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- history_size = 2
- history_len = history_size * prediction_len
-
- CFG = EasyDict()
-
- # ================= general ================= #
- CFG.DESCRIPTION = "Multi-layer perceptron model configuration "
- CFG.RUNNER = M4ForecastingRunner
- CFG.DATASET_CLS = M4ForecastingDataset
- CFG.DATASET_NAME = "M4_" + seasonal_pattern
- CFG.DATASET_INPUT_LEN = history_len
- CFG.DATASET_OUTPUT_LEN = prediction_len
- CFG.GPU_NUM = 1
-
- # ================= environment ================= #
- CFG.ENV = EasyDict()
- CFG.ENV.SEED = 1
- CFG.ENV.CUDNN = EasyDict()
- CFG.ENV.CUDNN.ENABLED = True
-
- # ================= model ================= #
- CFG.MODEL = EasyDict()
- CFG.MODEL.NAME = "PatchTST"
- CFG.MODEL.ARCH = PatchTST
- CFG.MODEL.PARAM = {
- "enc_in": 1, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
- "e_layers": 3, # num of encoder layers
- "n_heads": 4,
- "d_model": 16,
- "d_ff": 128,
- "dropout": 0.3,
- "fc_dropout": 0.3,
- "head_dropout": 0.0,
- "patch_len": 2,
- "stride": 1,
- "individual": 0, # individual head; True 1 False 0
- "padding_patch": "end", # None: None; end: padding on the end
- "revin": 1, # RevIN; True 1 False 0
- "affine": 0, # RevIN-affine; True 1 False 0
- "subtract_last": 0, # 0: subtract mean; 1: subtract last
- "decomposition": 0, # decomposition; True 1 False 0
- "kernel_size": 2, # decomposition-kernel
- }
- CFG.MODEL.FORWARD_FEATURES = [0]
- CFG.MODEL.TARGET_FEATURES = [0]
-
- # ================= optim ================= #
- CFG.TRAIN = EasyDict()
- CFG.TRAIN.LOSS = masked_mae
- CFG.TRAIN.OPTIM = EasyDict()
- CFG.TRAIN.OPTIM.TYPE = "Adam"
- CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
- }
- CFG.TRAIN.LR_SCHEDULER = EasyDict()
- CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
- CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 80],
- "gamma": 0.5
- }
-
- # ================= train ================= #
- CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
- }
- CFG.TRAIN.NUM_EPOCHS = 100
- CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
- )
- # train data
- CFG.TRAIN.DATA = EasyDict()
- # read data
- CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TRAIN.DATA.BATCH_SIZE = 64
- CFG.TRAIN.DATA.PREFETCH = False
- CFG.TRAIN.DATA.SHUFFLE = True
- CFG.TRAIN.DATA.NUM_WORKERS = 2
- CFG.TRAIN.DATA.PIN_MEMORY = False
-
- # ================= test ================= #
- CFG.TEST = EasyDict()
- CFG.TEST.INTERVAL = 5
- # test data
- CFG.TEST.DATA = EasyDict()
- # read data
- CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TEST.DATA.BATCH_SIZE = 64
- CFG.TEST.DATA.PREFETCH = False
- CFG.TEST.DATA.SHUFFLE = False
- CFG.TEST.DATA.NUM_WORKERS = 2
- CFG.TEST.DATA.PIN_MEMORY = False
-
- # ================= evaluate ================= #
- CFG.EVAL = EasyDict()
- CFG.EVAL.HORIZONS = []
- CFG.EVAL.SAVE_PATH = os.path.abspath(__file__ + "/..")
-
- return CFG
diff --git a/baselines/PatchTST/PEMS04.py b/baselines/PatchTST/PEMS04.py
deleted file mode 100644
index 39570896..00000000
--- a/baselines/PatchTST/PEMS04.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import PatchTST
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
- "e_layers": 3, # num of encoder layers
- "n_heads": 16,
- "d_model": 128,
- "d_ff": 256,
- "dropout": 0.2,
- "fc_dropout": 0.2,
- "head_dropout": 0.0,
- "patch_len": 32,
- "stride": 16,
- "individual": 0, # individual head; True 1 False 0
- "padding_patch": "end", # None: None; end: padding on the end
- "revin": 1, # RevIN; True 1 False 0
- "affine": 0, # RevIN-affine; True 1 False 0
- "subtract_last": 0, # 0: subtract mean; 1: subtract last
- "decomposition": 0, # decomposition; True 1 False 0
- "kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.001,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/PatchTST/PEMS04_LTSF.py b/baselines/PatchTST/PEMS04_LTSF.py
new file mode 100644
index 00000000..e49bafab
--- /dev/null
+++ b/baselines/PatchTST/PEMS04_LTSF.py
@@ -0,0 +1,154 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import PatchTST
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
+NUM_NODES = 307
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
+ "e_layers": 3, # num of encoder layers
+ "n_heads": 16,
+ "d_model": 128,
+ "d_ff": 256,
+ "dropout": 0.2,
+ "fc_dropout": 0.2,
+ "head_dropout": 0.0,
+ "patch_len": 32,
+ "stride": 16,
+ "individual": 0, # individual head; True 1 False 0
+ "padding_patch": "end", # None: None; end: padding on the end
+ "revin": 1, # RevIN; True 1 False 0
+ "affine": 0, # RevIN-affine; True 1 False 0
+ "subtract_last": 0, # 0: subtract mean; 1: subtract last
+ "decomposition": 0, # decomposition; True 1 False 0
+ "kernel_size": 25, # decomposition-kernel
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.001,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/PEMS08.py b/baselines/PatchTST/PEMS08.py
deleted file mode 100644
index b592025c..00000000
--- a/baselines/PatchTST/PEMS08.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import PatchTST
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 720
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
- "e_layers": 3, # num of encoder layers
- "n_heads": 16,
- "d_model": 128,
- "d_ff": 256,
- "dropout": 0.2,
- "fc_dropout": 0.2,
- "head_dropout": 0.0,
- "patch_len": 32,
- "stride": 16,
- "individual": 0, # individual head; True 1 False 0
- "padding_patch": "end", # None: None; end: padding on the end
- "revin": 1, # RevIN; True 1 False 0
- "affine": 0, # RevIN-affine; True 1 False 0
- "subtract_last": 0, # 0: subtract mean; 1: subtract last
- "decomposition": 0, # decomposition; True 1 False 0
- "kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.001,
- "weight_decay": 0.0005,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/PatchTST/PEMS08_LTSF.py b/baselines/PatchTST/PEMS08_LTSF.py
new file mode 100644
index 00000000..aa578450
--- /dev/null
+++ b/baselines/PatchTST/PEMS08_LTSF.py
@@ -0,0 +1,154 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import PatchTST
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
+NUM_NODES = 170
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
+ "e_layers": 3, # num of encoder layers
+ "n_heads": 16,
+ "d_model": 128,
+ "d_ff": 256,
+ "dropout": 0.2,
+ "fc_dropout": 0.2,
+ "head_dropout": 0.0,
+ "patch_len": 32,
+ "stride": 16,
+ "individual": 0, # individual head; True 1 False 0
+ "padding_patch": "end", # None: None; end: padding on the end
+ "revin": 1, # RevIN; True 1 False 0
+ "affine": 0, # RevIN-affine; True 1 False 0
+ "subtract_last": 0, # 0: subtract mean; 1: subtract last
+ "decomposition": 0, # decomposition; True 1 False 0
+ "kernel_size": 25, # decomposition-kernel
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.001,
+ "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/Weather.py b/baselines/PatchTST/Weather.py
index f84d7363..30557f44 100644
--- a/baselines/PatchTST/Weather.py
+++ b/baselines/PatchTST/Weather.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import PatchTST
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "PatchTST model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "PatchTST"
-CFG.MODEL.ARCH = PatchTST
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
- "seq_len": CFG.DATASET_INPUT_LEN, # input sequence length
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+ "seq_len": INPUT_LEN, # input sequence length
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"e_layers": 3, # num of encoder layers
"n_heads": 16,
"d_model": 128,
@@ -55,75 +44,108 @@
"subtract_last": 0, # 0: subtract mean; 1: subtract last
"decomposition": 0, # decomposition; True 1 False 0
"kernel_size": 25, # decomposition-kernel
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0] # [raw_data, time_of_day, day_of_week, day_of_month, day_of_year]
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
- "weight_decay": 0.0005,
+ "weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/PatchTST/arch/patchtst_arch.py b/baselines/PatchTST/arch/patchtst_arch.py
index d3d2ceec..076d1b3e 100644
--- a/baselines/PatchTST/arch/patchtst_arch.py
+++ b/baselines/PatchTST/arch/patchtst_arch.py
@@ -24,14 +24,14 @@ def __init__(self, enc_in, seq_len, pred_len, e_layers, n_heads, d_model, d_ff,
pre_norm:bool=False, store_attn:bool=False, pe:str='zeros',
learn_pe:bool=True, pretrain_head:bool=False, head_type = 'flatten',
verbose:bool=False, **kwargs):
-
+
super().__init__()
# load parameters
c_in = enc_in
context_window = seq_len
target_window = pred_len
-
+
n_layers = e_layers
n_heads = n_heads
d_model = d_model
@@ -39,21 +39,21 @@ def __init__(self, enc_in, seq_len, pred_len, e_layers, n_heads, d_model, d_ff,
dropout = dropout
fc_dropout = fc_dropout
head_dropout = head_dropout
-
- individual = individual
+ individual = individual
+
patch_len = patch_len
stride = stride
padding_patch = padding_patch
-
+
revin = revin
affine = affine
subtract_last = subtract_last
-
+
decomposition = decomposition
kernel_size = kernel_size
-
-
+
+
# model
self.decomposition = decomposition
if self.decomposition:
@@ -83,8 +83,8 @@ def __init__(self, enc_in, seq_len, pred_len, e_layers, n_heads, d_model, d_ff,
pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
subtract_last=subtract_last, verbose=verbose, **kwargs)
-
-
+
+
def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
"""Feed forward of PatchTST.
@@ -96,7 +96,7 @@ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_s
"""
assert history_data.shape[-1] == 1 # only use the target feature
x = history_data[..., 0] # B, L, N
-
+
if self.decomposition:
res_init, trend_init = self.decomp_module(x)
res_init, trend_init = res_init.permute(0,2,1), trend_init.permute(0,2,1) # x: [Batch, Channel, Input length]
diff --git a/baselines/PatchTST/arch/patchtst_backbone.py b/baselines/PatchTST/arch/patchtst_backbone.py
index bc588c4d..7cfeec31 100644
--- a/baselines/PatchTST/arch/patchtst_backbone.py
+++ b/baselines/PatchTST/arch/patchtst_backbone.py
@@ -19,13 +19,13 @@ def __init__(self, c_in:int, context_window:int, target_window:int, patch_len:in
pe:str='zeros', learn_pe:bool=True, fc_dropout:float=0., head_dropout = 0, padding_patch = None,
pretrain_head:bool=False, head_type = 'flatten', individual = False, revin = True, affine = True, subtract_last = False,
verbose:bool=False, **kwargs):
-
+
super().__init__()
-
+
# RevIn
self.revin = revin
if self.revin: self.revin_layer = RevIN(c_in, affine=affine, subtract_last=subtract_last)
-
+
# Patching
self.patch_len = patch_len
self.stride = stride
@@ -34,7 +34,7 @@ def __init__(self, c_in:int, context_window:int, target_window:int, patch_len:in
if padding_patch == 'end': # can be modified to general case
self.padding_patch_layer = nn.ReplicationPad1d((0, stride))
patch_num += 1
-
+
# Backbone
self.backbone = TSTiEncoder(c_in, patch_num=patch_num, patch_len=patch_len, max_seq_len=max_seq_len,
n_layers=n_layers, d_model=d_model, n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff,
@@ -53,32 +53,32 @@ def __init__(self, c_in:int, context_window:int, target_window:int, patch_len:in
self.head = self.create_pretrain_head(self.head_nf, c_in, fc_dropout) # custom head passed as a partial func with all its kwargs
elif head_type == 'flatten':
self.head = Flatten_Head(self.individual, self.n_vars, self.head_nf, target_window, head_dropout=head_dropout)
-
+
def forward(self, z): # z: [bs x nvars x seq_len]
# norm
if self.revin:
z = z.permute(0,2,1)
z = self.revin_layer(z, 'norm')
z = z.permute(0,2,1)
-
+
# do patching
if self.padding_patch == 'end':
z = self.padding_patch_layer(z)
z = z.unfold(dimension=-1, size=self.patch_len, step=self.stride) # z: [bs x nvars x patch_num x patch_len]
z = z.permute(0,1,3,2) # z: [bs x nvars x patch_len x patch_num]
-
+
# model
z = self.backbone(z) # z: [bs x nvars x d_model x patch_num]
z = self.head(z) # z: [bs x nvars x target_window]
-
+
# denorm
if self.revin:
z = z.permute(0,2,1)
z = self.revin_layer(z, 'denorm')
z = z.permute(0,2,1)
return z
-
+
def create_pretrain_head(self, head_nf, vars, dropout):
return nn.Sequential(nn.Dropout(dropout),
nn.Conv1d(head_nf, vars, 1)
@@ -88,10 +88,10 @@ def create_pretrain_head(self, head_nf, vars, dropout):
class Flatten_Head(nn.Module):
def __init__(self, individual, n_vars, nf, target_window, head_dropout=0):
super().__init__()
-
+
self.individual = individual
self.n_vars = n_vars
-
+
if self.individual:
self.linears = nn.ModuleList()
self.dropouts = nn.ModuleList()
@@ -104,7 +104,7 @@ def __init__(self, individual, n_vars, nf, target_window, head_dropout=0):
self.flatten = nn.Flatten(start_dim=-2)
self.linear = nn.Linear(nf, target_window)
self.dropout = nn.Dropout(head_dropout)
-
+
def forward(self, x): # x: [bs x nvars x d_model x patch_num]
if self.individual:
x_out = []
@@ -119,23 +119,23 @@ def forward(self, x): # x: [bs x nvars x d_model
x = self.linear(x)
x = self.dropout(x)
return x
-
-
+
+
class TSTiEncoder(nn.Module): #i means channel-independent
def __init__(self, c_in, patch_num, patch_len, max_seq_len=1024,
n_layers=3, d_model=128, n_heads=16, d_k=None, d_v=None,
d_ff=256, norm='BatchNorm', attn_dropout=0., dropout=0., act="gelu", store_attn=False,
key_padding_mask='auto', padding_var=None, attn_mask=None, res_attention=True, pre_norm=False,
pe='zeros', learn_pe=True, verbose=False, **kwargs):
-
-
+
+
super().__init__()
-
+
self.patch_num = patch_num
self.patch_len = patch_len
-
+
# Input encoding
q_len = patch_num
self.W_P = nn.Linear(patch_len, d_model) # Eq 1: projection of feature vectors onto a d-dim vector space
@@ -151,9 +151,9 @@ def __init__(self, c_in, patch_num, patch_len, max_seq_len=1024,
self.encoder = TSTEncoder(q_len, d_model, n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, dropout=dropout,
pre_norm=pre_norm, activation=act, res_attention=res_attention, n_layers=n_layers, store_attn=store_attn)
-
+
def forward(self, x) -> Tensor: # x: [bs x nvars x patch_len x patch_num]
-
+
n_vars = x.shape[1]
# Input encoding
x = x.permute(0,1,3,2) # x: [bs x nvars x patch_num x patch_len]
@@ -166,11 +166,11 @@ def forward(self, x) -> Tensor: # x
z = self.encoder(u) # z: [bs * nvars x patch_num x d_model]
z = torch.reshape(z, (-1,n_vars,z.shape[-2],z.shape[-1])) # z: [bs x nvars x patch_num x d_model]
z = z.permute(0,1,3,2) # z: [bs x nvars x d_model x patch_num]
-
- return z
-
-
+ return z
+
+
+
# Cell
class TSTEncoder(nn.Module):
def __init__(self, q_len, d_model, n_heads, d_k=None, d_v=None, d_ff=None,
diff --git a/baselines/PatchTST/arch/patchtst_layers.py b/baselines/PatchTST/arch/patchtst_layers.py
index 945d315a..aeb2df72 100644
--- a/baselines/PatchTST/arch/patchtst_layers.py
+++ b/baselines/PatchTST/arch/patchtst_layers.py
@@ -10,14 +10,14 @@ def forward(self, x):
if self.contiguous: return x.transpose(*self.dims).contiguous()
else: return x.transpose(*self.dims)
-
+
def get_activation_fn(activation):
if callable(activation): return activation()
elif activation.lower() == "relu": return nn.ReLU()
elif activation.lower() == "gelu": return nn.GELU()
raise ValueError(f'{activation} is not available. You can use "relu", "gelu", or a callable')
-
-
+
+
# decomposition
class moving_avg(nn.Module):
@@ -51,9 +51,9 @@ def forward(self, x):
moving_mean = self.moving_avg(x)
res = x - moving_mean
return res, moving_mean
-
-
-
+
+
+
# pos_encoding
def PositionalEncoding(q_len, d_model, normalize=True):
diff --git a/baselines/PatchTST/run.sh b/baselines/PatchTST/run.sh
deleted file mode 100644
index 5802fb23..00000000
--- a/baselines/PatchTST/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/PatchTST/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/Weather.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/PEMS08.py --gpus '0'
diff --git a/baselines/Pyraformer/ETTh1.py b/baselines/Pyraformer/ETTh1.py
index 53d34407..b4b76630 100644
--- a/baselines/Pyraformer/ETTh1.py
+++ b/baselines/Pyraformer/ETTh1.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -64,65 +52,106 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/ETTh2.py b/baselines/Pyraformer/ETTh2.py
index 98d7090c..27d503e3 100644
--- a/baselines/Pyraformer/ETTh2.py
+++ b/baselines/Pyraformer/ETTh2.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -64,65 +52,106 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
-# ================= optim ================= #
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/ETTm1.py b/baselines/Pyraformer/ETTm1.py
index c4b1e07a..e0eea048 100644
--- a/baselines/Pyraformer/ETTm1.py
+++ b/baselines/Pyraformer/ETTm1.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -59,70 +47,111 @@
"use_tvm": False,
"embed": "DataEmbedding",
"num_time_features": 4,
- "time_of_day_size": 24,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/ETTm2.py b/baselines/Pyraformer/ETTm2.py
index 97e9af10..92d49079 100644
--- a/baselines/Pyraformer/ETTm2.py
+++ b/baselines/Pyraformer/ETTm2.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -59,70 +47,111 @@
"use_tvm": False,
"embed": "DataEmbedding",
"num_time_features": 4,
- "time_of_day_size": 24,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/Electricity.py b/baselines/Pyraformer/Electricity.py
index a36cb9f1..efd8a170 100644
--- a/baselines/Pyraformer/Electricity.py
+++ b/baselines/Pyraformer/Electricity.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -59,69 +47,111 @@
"use_tvm": False,
"embed": "DataEmbedding",
"num_time_features": 4,
- "time_of_day_size": 144,
+ "time_of_day_size": 24,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/ExchangeRate.py b/baselines/Pyraformer/ExchangeRate.py
index cf40c382..19db416a 100644
--- a/baselines/Pyraformer/ExchangeRate.py
+++ b/baselines/Pyraformer/ExchangeRate.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 192
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -64,64 +52,106 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/PEMS04.py b/baselines/Pyraformer/PEMS04.py
deleted file mode 100644
index edd265f6..00000000
--- a/baselines/Pyraformer/PEMS04.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-import torch
-
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Pyraformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
- "d_model": 512,
- "d_inner_hid": 512,
- "d_k": 128,
- "d_v": 128,
- "d_bottleneck": 128,
- "n_head": 4,
- "n_layer": 4,
- "dropout": 0.05,
- "decoder": "FC", # FC or attention
- "window_size": "[2, 2, 2]",
- "inner_size": 5,
- "CSCM": "Bottleneck_Construct",
- "truncate": False,
- "use_tvm": False,
- "embed": "DataEmbedding",
- "num_time_features": 2,
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Pyraformer/PEMS04_LTSF.py b/baselines/Pyraformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..76e1de63
--- /dev/null
+++ b/baselines/Pyraformer/PEMS04_LTSF.py
@@ -0,0 +1,157 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Pyraformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
+ "d_model": 512,
+ "d_inner_hid": 512,
+ "d_k": 128,
+ "d_v": 128,
+ "d_bottleneck": 128,
+ "n_head": 4,
+ "n_layer": 4,
+ "dropout": 0.05,
+ "decoder": "FC", # FC or attention
+ "window_size": "[2, 2, 2]",
+ "inner_size": 5,
+ "CSCM": "Bottleneck_Construct",
+ "truncate": False,
+ "use_tvm": False,
+ "embed": "DataEmbedding",
+ "num_time_features": 2,
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/PEMS08.py b/baselines/Pyraformer/PEMS08.py
deleted file mode 100644
index c7fd9f21..00000000
--- a/baselines/Pyraformer/PEMS08.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-import torch
-
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
-
-from .arch import Pyraformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
-NUM_NODES = 170
-CFG.MODEL.PARAM = EasyDict(
- {
- "enc_in": NUM_NODES, # num nodes
- "dec_in": NUM_NODES,
- "c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
- "d_model": 512,
- "d_inner_hid": 512,
- "d_k": 128,
- "d_v": 128,
- "d_bottleneck": 128,
- "n_head": 4,
- "n_layer": 4,
- "dropout": 0.05,
- "decoder": "FC", # FC or attention
- "window_size": "[2, 2, 2]",
- "inner_size": 5,
- "CSCM": "Bottleneck_Construct",
- "truncate": False,
- "use_tvm": False,
- "embed": "DataEmbedding",
- "num_time_features": 2,
- "time_of_day_size": 288,
- "day_of_week_size": 7,
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0005
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Pyraformer/PEMS08_LTSF.py b/baselines/Pyraformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..8c72c447
--- /dev/null
+++ b/baselines/Pyraformer/PEMS08_LTSF.py
@@ -0,0 +1,157 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Pyraformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "enc_in": NUM_NODES, # num nodes
+ "dec_in": NUM_NODES,
+ "c_out": NUM_NODES,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
+ "d_model": 512,
+ "d_inner_hid": 512,
+ "d_k": 128,
+ "d_v": 128,
+ "d_bottleneck": 128,
+ "n_head": 4,
+ "n_layer": 4,
+ "dropout": 0.05,
+ "decoder": "FC", # FC or attention
+ "window_size": "[2, 2, 2]",
+ "inner_size": 5,
+ "CSCM": "Bottleneck_Construct",
+ "truncate": False,
+ "use_tvm": False,
+ "embed": "DataEmbedding",
+ "num_time_features": 2,
+ "time_of_day_size": 288,
+ "day_of_week_size": 7,
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0005
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/Weather.py b/baselines/Pyraformer/Weather.py
index f4ae15b6..c661e32c 100644
--- a/baselines/Pyraformer/Weather.py
+++ b/baselines/Pyraformer/Weather.py
@@ -1,48 +1,36 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-import torch
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Pyraformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Pyraformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather Data"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Pyraformer"
-CFG.MODEL.ARCH = Pyraformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96 # better results than regular_settings['INPUT_LEN'] (336)
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"enc_in": NUM_NODES, # num nodes
"dec_in": NUM_NODES,
"c_out": NUM_NODES,
- "input_size": CFG.DATASET_INPUT_LEN,
- "predict_step": CFG.DATASET_OUTPUT_LEN,
+ "input_size": INPUT_LEN,
+ "predict_step": OUTPUT_LEN,
"d_model": 512,
"d_inner_hid": 512,
"d_k": 128,
@@ -64,64 +52,106 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Pyraformer/run.sh b/baselines/Pyraformer/run.sh
deleted file mode 100644
index b1e61c57..00000000
--- a/baselines/Pyraformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Pyraformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/PEMS08.py --gpus '0'
diff --git a/baselines/STAEformer/METR-LA.py b/baselines/STAEformer/METR-LA.py
index b6b02c83..bd7690b6 100644
--- a/baselines/STAEformer/METR-LA.py
+++ b/baselines/STAEformer/METR-LA.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes" : 207
}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 30
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/PEMS-BAY.py b/baselines/STAEformer/PEMS-BAY.py
index 9a5030cb..8ce154c5 100644
--- a/baselines/STAEformer/PEMS-BAY.py
+++ b/baselines/STAEformer/PEMS-BAY.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
+
+MODEL_PARAM = {
+ "num_nodes" : 325
+}
+NUM_EPOCHS = 100
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
- "num_nodes" : 207
-}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 30
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/PEMS03.py b/baselines/STAEformer/PEMS03.py
index d38d1f52..2a7db9cd 100644
--- a/baselines/STAEformer/PEMS03.py
+++ b/baselines/STAEformer/PEMS03.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes" : 358
}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
- "weight_decay": 0.0015,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [25, 45, 65],
+ "milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 70
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/PEMS04.py b/baselines/STAEformer/PEMS04.py
index c8a2c827..c6e3b126 100644
--- a/baselines/STAEformer/PEMS04.py
+++ b/baselines/STAEformer/PEMS04.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes" : 307
}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
- "weight_decay": 0.0015,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [25, 45, 65],
+ "milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 70
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/PEMS07.py b/baselines/STAEformer/PEMS07.py
index 0c3994a6..664ff3cb 100644
--- a/baselines/STAEformer/PEMS07.py
+++ b/baselines/STAEformer/PEMS07.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes" : 883
}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
- "weight_decay": 0.0015,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [25, 45, 65],
+ "milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 70
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 8
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/PEMS08.py b/baselines/STAEformer/PEMS08.py
index 4a3a0c45..0ffcd096 100644
--- a/baselines/STAEformer/PEMS08.py
+++ b/baselines/STAEformer/PEMS08.py
@@ -1,112 +1,132 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STAEformer
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STAEformer
-# ================= general ================= #
-CFG.DESCRIPTION = "STAEformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STAEformer"
-CFG.MODEL.ARCH = STAEformer
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
-adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"num_nodes" : 170
}
-CFG.MODEL.FORWARD_FEATURES = [0,1,2]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
- "weight_decay": 0.0015,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [25, 45, 65],
+ "milestones": [20, 25],
"gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# "max_norm": 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 70
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STAEformer/run.sh b/baselines/STAEformer/run.sh
deleted file mode 100644
index 1e92166e..00000000
--- a/baselines/STAEformer/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STAEformer/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS08.py --gpus '0'
diff --git a/baselines/STEP/README.md b/baselines/STEP/README.md
index f8b79f8c..eae8c956 100644
--- a/baselines/STEP/README.md
+++ b/baselines/STEP/README.md
@@ -1,2 +1 @@
-STEP requires a pre-trained TSFormer model. You can download them from [here](https://github.com/zezhishao/STEP/tree/github/tsformer_ckpt) and place them in the `./ckpts/` folder.
-In addition, STEP requires `timm` package. You can install it by `pip install timm`.
+STEP requires `timm` package. You can install it by `pip install timm`.
diff --git a/baselines/STEP/STEP_METR-LA.py b/baselines/STEP/STEP_METR-LA.py
index 1eb513ff..fb359074 100644
--- a/baselines/STEP/STEP_METR-LA.py
+++ b/baselines/STEP/STEP_METR-LA.py
@@ -1,49 +1,36 @@
import os
import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(METR-LA) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7
- }
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.scaler import ZScoreScaler
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.utils import get_regular_settings
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
+from .arch import STEP
+from .loss import step_loss
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_METR-LA.pt",
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN_SHORT = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 288 * 7
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STEP
+MODEL_PARAM = {
+ "dataset_name": DATA_NAME,
+ "pre_trained_tsformer_path": "checkpoints/TSFormer/METR-LA_100_2016_12/de9f10ca8535dbe99fb71072aab848ce/TSFormer_best_val_MAE.pt",
+ "short_term_len": INPUT_LEN_SHORT,
+ "long_term_len": INPUT_LEN,
"tsformer_args": {
"patch_size":12,
"in_channel":1,
@@ -75,49 +62,95 @@
"layers" : 2
},
"dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
+ "dataset_name": DATA_NAME,
"k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
+ "input_seq_len": INPUT_LEN,
+ "output_seq_len": OUTPUT_LEN
}
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 2 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = step_loss
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
+CFG.TRAIN.OPTIM.PARAM = {
"lr":0.005,
"weight_decay":1.0e-5,
"eps":1.0e-8,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones":[1, 18, 36, 54, 72],
"gamma":0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
CFG.TRAIN.DATA.NUM_WORKERS = 2
CFG.TRAIN.DATA.PIN_MEMORY = True
@@ -127,35 +160,26 @@
CFG.TRAIN.CL.CL_EPOCHS = 6
CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
CFG.VAL.DATA.NUM_WORKERS = 2
CFG.VAL.DATA.PIN_MEMORY = True
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# evluation
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
CFG.TEST.DATA.NUM_WORKERS = 2
CFG.TEST.DATA.PIN_MEMORY = True
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = range(1, 13) # 1, 2, ..., 12
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STEP/STEP_PEMS-BAY.py b/baselines/STEP/STEP_PEMS-BAY.py
deleted file mode 100644
index b0b0bbe4..00000000
--- a/baselines/STEP/STEP_PEMS-BAY.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import os
-import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(PEMS-BAY) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7
- }
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_PEMS-BAY.pt",
- "tsformer_args": {
- "patch_size":12,
- "in_channel":1,
- "embed_dim":96,
- "num_heads":4,
- "mlp_ratio":4,
- "dropout":0.1,
- "num_token":288 * 7 / 12,
- "mask_ratio":0.75,
- "encoder_depth":4,
- "decoder_depth":1,
- "mode":"forecasting"
- },
- "backend_args": {
- "num_nodes" : 325,
- "support_len" : 2,
- "dropout" : 0.3,
- "gcn_bool" : True,
- "addaptadj" : True,
- "aptinit" : None,
- "in_dim" : 2,
- "out_dim" : 12,
- "residual_channels" : 32,
- "dilation_channels" : 32,
- "skip_channels" : 256,
- "end_channels" : 512,
- "kernel_size" : 2,
- "blocks" : 4,
- "layers" : 2
- },
- "dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
- "k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
- }
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = step_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.001,
- "weight_decay":1.0e-5,
- "eps":1.0e-8,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[1, 18, 36, 54, 72],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = True
-# curriculum learning
-CFG.TRAIN.CL = EasyDict()
-CFG.TRAIN.CL.WARM_EPOCHS = 30
-CFG.TRAIN.CL.CL_EPOCHS = 3
-CFG.TRAIN.CL.PREDICTION_LENGTH = 12
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STEP/STEP_PEMS03.py b/baselines/STEP/STEP_PEMS03.py
deleted file mode 100644
index b41812ed..00000000
--- a/baselines/STEP/STEP_PEMS03.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import os
-import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(PEMS03) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7 * 2
- }
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_PEMS03.pt",
- "tsformer_args": {
- "patch_size":12,
- "in_channel":1,
- "embed_dim":96,
- "num_heads":4,
- "mlp_ratio":4,
- "dropout":0.1,
- "num_token":288 * 7 * 2 / 12,
- "mask_ratio":0.75,
- "encoder_depth":4,
- "decoder_depth":1,
- "mode":"forecasting"
- },
- "backend_args": {
- "num_nodes" : 358,
- "support_len" : 2,
- "dropout" : 0.3,
- "gcn_bool" : True,
- "addaptadj" : True,
- "aptinit" : None,
- "in_dim" : 2,
- "out_dim" : 12,
- "residual_channels" : 32,
- "dilation_channels" : 32,
- "skip_channels" : 256,
- "end_channels" : 512,
- "kernel_size" : 2,
- "blocks" : 4,
- "layers" : 2
- },
- "dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
- "k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
- }
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = step_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002,
- "weight_decay":1.0e-5,
- "eps":1.0e-8,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[1, 18, 36, 54, 72],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 8
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 8
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STEP/STEP_PEMS04.py b/baselines/STEP/STEP_PEMS04.py
deleted file mode 100644
index 62150592..00000000
--- a/baselines/STEP/STEP_PEMS04.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import os
-import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(PEMS04) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7 * 2
- }
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_PEMS04.pt",
- "tsformer_args": {
- "patch_size":12,
- "in_channel":1,
- "embed_dim":96,
- "num_heads":4,
- "mlp_ratio":4,
- "dropout":0.1,
- "num_token":288 * 7 * 2 / 12,
- "mask_ratio":0.75,
- "encoder_depth":4,
- "decoder_depth":1,
- "mode":"forecasting"
- },
- "backend_args": {
- "num_nodes" : 307,
- "support_len" : 2,
- "dropout" : 0.3,
- "gcn_bool" : True,
- "addaptadj" : True,
- "aptinit" : None,
- "in_dim" : 2,
- "out_dim" : 12,
- "residual_channels" : 32,
- "dilation_channels" : 32,
- "skip_channels" : 256,
- "end_channels" : 512,
- "kernel_size" : 2,
- "blocks" : 4,
- "layers" : 2
- },
- "dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
- "k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
- }
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = step_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002,
- "weight_decay":1.0e-5,
- "eps":1.0e-8,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[1, 18, 36, 54, 72],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 8
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 8
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STEP/STEP_PEMS07.py b/baselines/STEP/STEP_PEMS07.py
deleted file mode 100644
index 8c113d53..00000000
--- a/baselines/STEP/STEP_PEMS07.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import os
-import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(PEMS07) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7
- }
-CFG.GPU_NUM = 2
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_PEMS07.pt",
- "tsformer_args": {
- "patch_size":12,
- "in_channel":1,
- "embed_dim":96,
- "num_heads":4,
- "mlp_ratio":4,
- "dropout":0.1,
- "num_token":288 * 7 / 12,
- "mask_ratio":0.75,
- "encoder_depth":4,
- "decoder_depth":1,
- "mode":"forecasting"
- },
- "backend_args": {
- "num_nodes" : 883,
- "support_len" : 2,
- "dropout" : 0.3,
- "gcn_bool" : True,
- "addaptadj" : True,
- "aptinit" : None,
- "in_dim" : 2,
- "out_dim" : 12,
- "residual_channels" : 32,
- "dilation_channels" : 32,
- "skip_channels" : 256,
- "end_channels" : 512,
- "kernel_size" : 2,
- "blocks" : 4,
- "layers" : 2
- },
- "dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
- "k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
- }
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = step_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002,
- "weight_decay":1.0e-5,
- "eps":1.0e-8,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[1, 18, 36, 54, 72],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-CFG.TRAIN.NULL_VAL = 0.0
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 8
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 100
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 8
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STEP/STEP_PEMS08.py b/baselines/STEP/STEP_PEMS08.py
deleted file mode 100644
index d2cc5a9b..00000000
--- a/baselines/STEP/STEP_PEMS08.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import os
-import sys
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.utils.serialization import load_adj
-
-from .step_arch import STEP
-from .step_runner import STEPRunner
-from .step_loss import step_loss
-from .step_data import ForecastingDataset
-
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STEP(PEMS08) configuration"
-CFG.RUNNER = STEPRunner
-CFG.DATASET_CLS = ForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.DATASET_ARGS = {
- "seq_len": 288 * 7 * 2
- }
-CFG.GPU_NUM = 4
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STEP"
-CFG.MODEL.ARCH = STEP
-CFG.MODEL.PARAM = {
- "dataset_name": CFG.DATASET_NAME,
- "pre_trained_tsformer_path": "baselines/STEP/ckpts/TSFormer_PEMS08.pt",
- "tsformer_args": {
- "patch_size":12,
- "in_channel":1,
- "embed_dim":96,
- "num_heads":4,
- "mlp_ratio":4,
- "dropout":0.1,
- "num_token":288 * 7 * 2 / 12,
- "mask_ratio":0.75,
- "encoder_depth":4,
- "decoder_depth":1,
- "mode":"forecasting"
- },
- "backend_args": {
- "num_nodes" : 170,
- "support_len" : 2,
- "dropout" : 0.3,
- "gcn_bool" : True,
- "addaptadj" : True,
- "aptinit" : None,
- "in_dim" : 2,
- "out_dim" : 12,
- "residual_channels" : 32,
- "dilation_channels" : 32,
- "skip_channels" : 256,
- "end_channels" : 512,
- "kernel_size" : 2,
- "blocks" : 4,
- "layers" : 2
- },
- "dgl_args": {
- "dataset_name": CFG.DATASET_NAME,
- "k": 10,
- "input_seq_len": CFG.DATASET_INPUT_LEN,
- "output_seq_len": CFG.DATASET_OUTPUT_LEN
- }
-}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = True
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = step_loss
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002,
- "weight_decay":1.0e-5,
- "eps":1.0e-8,
-}
-CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
-CFG.TRAIN.LR_SCHEDULER.PARAM= {
- "milestones":[1, 18, 36, 54, 72],
- "gamma":0.5
-}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 3.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = True
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 8
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = True
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# evluation
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 8
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = True
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STEP/TSFormer_METR-LA.py b/baselines/STEP/TSFormer_METR-LA.py
new file mode 100644
index 00000000..5dd8a776
--- /dev/null
+++ b/baselines/STEP/TSFormer_METR-LA.py
@@ -0,0 +1,152 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.scaler import ZScoreScaler
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.utils import get_regular_settings
+
+from .arch import TSFormer
+from .runner import TSFormerRunner
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 288 * 7
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TSFormer
+MODEL_PARAM = {
+ "patch_size":12,
+ "in_channel":1,
+ "embed_dim":96,
+ "num_heads":4,
+ "mlp_ratio":4,
+ "dropout":0.1,
+ "num_token":288 * 7 / 12,
+ "mask_ratio":0.75,
+ "encoder_depth":4,
+ "decoder_depth":1,
+ "mode":"pre-train"
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = TSFormerRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr":0.0005,
+ "weight_decay":0,
+ "eps":1.0e-8,
+ "betas":(0.9, 0.95)
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones":[50],
+ "gamma":0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 8
+CFG.TRAIN.DATA.SHUFFLE = True
+CFG.TRAIN.DATA.NUM_WORKERS = 2
+CFG.TRAIN.DATA.PIN_MEMORY = True
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 8
+CFG.VAL.DATA.NUM_WORKERS = 2
+CFG.VAL.DATA.PIN_MEMORY = True
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 8
+CFG.TEST.DATA.NUM_WORKERS = 2
+CFG.TEST.DATA.PIN_MEMORY = True
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STEP/step_arch/__init__.py b/baselines/STEP/arch/__init__.py
similarity index 100%
rename from baselines/STEP/step_arch/__init__.py
rename to baselines/STEP/arch/__init__.py
diff --git a/baselines/STEP/step_arch/discrete_graph_learning.py b/baselines/STEP/arch/discrete_graph_learning.py
similarity index 97%
rename from baselines/STEP/step_arch/discrete_graph_learning.py
rename to baselines/STEP/arch/discrete_graph_learning.py
index 91bdab00..dfbc40c0 100644
--- a/baselines/STEP/step_arch/discrete_graph_learning.py
+++ b/baselines/STEP/arch/discrete_graph_learning.py
@@ -3,7 +3,7 @@
import numpy as np
from torch import nn
import torch.nn.functional as F
-from basicts.utils import load_pkl
+from basicts.utils import load_dataset_data
from .similarity import batch_cosine_similarity, batch_dot_similarity
@@ -54,7 +54,7 @@ def __init__(self, dataset_name, k, input_seq_len, output_seq_len):
self.k = k # the "k" of knn graph
self.num_nodes = {"METR-LA": 207, "PEMS04": 307, "PEMS03": 358, "PEMS-BAY": 325, "PEMS07": 883, "PEMS08": 170}[dataset_name]
self.train_length = {"METR-LA": 23990, "PEMS04": 13599, "PEMS03": 15303, "PEMS07": 16513, "PEMS-BAY": 36482, "PEMS08": 14284}[dataset_name]
- self.node_feats = torch.from_numpy(load_pkl("datasets/" + dataset_name + "/data_in_{0}_out_{1}_rescale_True.pkl".format(input_seq_len, output_seq_len))["processed_data"]).float()[:self.train_length, :, 0]
+ self.node_feats = torch.from_numpy(load_dataset_data(dataset_name))[:self.train_length, :, 0]
# CNN for global feature extraction
## for the dimension, see https://github.com/zezhishao/STEP/issues/1#issuecomment-1191640023
diff --git a/baselines/STEP/step_arch/graphwavenet/__init__.py b/baselines/STEP/arch/graphwavenet/__init__.py
similarity index 100%
rename from baselines/STEP/step_arch/graphwavenet/__init__.py
rename to baselines/STEP/arch/graphwavenet/__init__.py
diff --git a/baselines/STEP/step_arch/graphwavenet/model.py b/baselines/STEP/arch/graphwavenet/model.py
similarity index 100%
rename from baselines/STEP/step_arch/graphwavenet/model.py
rename to baselines/STEP/arch/graphwavenet/model.py
diff --git a/baselines/STEP/step_arch/similarity.py b/baselines/STEP/arch/similarity.py
similarity index 100%
rename from baselines/STEP/step_arch/similarity.py
rename to baselines/STEP/arch/similarity.py
diff --git a/baselines/STEP/step_arch/step.py b/baselines/STEP/arch/step.py
similarity index 68%
rename from baselines/STEP/step_arch/step.py
rename to baselines/STEP/arch/step.py
index 13c71df3..41022c39 100644
--- a/baselines/STEP/step_arch/step.py
+++ b/baselines/STEP/arch/step.py
@@ -9,8 +9,12 @@
class STEP(nn.Module):
"""Pre-training Enhanced Spatial-temporal Graph Neural Network for Multivariate Time Series Forecasting"""
- def __init__(self, dataset_name, pre_trained_tsformer_path, tsformer_args, backend_args, dgl_args):
+ def __init__(self, dataset_name, pre_trained_tsformer_path, short_term_len, long_term_len, tsformer_args, backend_args, dgl_args):
super().__init__()
+
+ self.short_term_len = short_term_len
+ self.long_term_len = long_term_len
+
self.dataset_name = dataset_name
self.pre_trained_tsformer_path = pre_trained_tsformer_path
@@ -34,25 +38,11 @@ def load_pre_trained_model(self):
for param in self.tsformer.parameters():
param.requires_grad = False
- def forward(self, history_data: torch.Tensor, long_history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, **kwargs) -> torch.Tensor:
- """Feed forward of STEP.
-
- Args:
- history_data (torch.Tensor): Short-term historical data. shape: [B, L, N, 3]
- long_history_data (torch.Tensor): Long-term historical data. shape: [B, L * P, N, 3]
- future_data (torch.Tensor): future data
- batch_seen (int): number of batches that have been seen
- epoch (int): number of epochs
-
- Returns:
- torch.Tensor: prediction with shape [B, N, L].
- torch.Tensor: the Bernoulli distribution parameters with shape [B, N, N].
- torch.Tensor: the kNN graph with shape [B, N, N], which is used to guide the training of the dependency graph.
- """
+ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, **kwargs) -> torch.Tensor:
# reshape
- short_term_history = history_data # [B, L, N, 1]
- long_term_history = long_history_data
+ long_term_history = history_data # [B, L, N, 1]
+ short_term_history = history_data[:, -self.short_term_len:, :, :]
# STEP
batch_size, _, num_nodes, _ = short_term_history.shape
diff --git a/baselines/STEP/step_arch/tsformer/__init__.py b/baselines/STEP/arch/tsformer/__init__.py
similarity index 100%
rename from baselines/STEP/step_arch/tsformer/__init__.py
rename to baselines/STEP/arch/tsformer/__init__.py
diff --git a/baselines/STEP/step_arch/tsformer/mask.py b/baselines/STEP/arch/tsformer/mask.py
similarity index 100%
rename from baselines/STEP/step_arch/tsformer/mask.py
rename to baselines/STEP/arch/tsformer/mask.py
diff --git a/baselines/STEP/step_arch/tsformer/patch.py b/baselines/STEP/arch/tsformer/patch.py
similarity index 100%
rename from baselines/STEP/step_arch/tsformer/patch.py
rename to baselines/STEP/arch/tsformer/patch.py
diff --git a/baselines/STEP/step_arch/tsformer/positional_encoding.py b/baselines/STEP/arch/tsformer/positional_encoding.py
similarity index 100%
rename from baselines/STEP/step_arch/tsformer/positional_encoding.py
rename to baselines/STEP/arch/tsformer/positional_encoding.py
diff --git a/baselines/STEP/step_arch/tsformer/transformer_layers.py b/baselines/STEP/arch/tsformer/transformer_layers.py
similarity index 100%
rename from baselines/STEP/step_arch/tsformer/transformer_layers.py
rename to baselines/STEP/arch/tsformer/transformer_layers.py
diff --git a/baselines/STEP/step_arch/tsformer/tsformer.py b/baselines/STEP/arch/tsformer/tsformer.py
similarity index 98%
rename from baselines/STEP/step_arch/tsformer/tsformer.py
rename to baselines/STEP/arch/tsformer/tsformer.py
index df54192a..2b57e4ae 100644
--- a/baselines/STEP/step_arch/tsformer/tsformer.py
+++ b/baselines/STEP/arch/tsformer/tsformer.py
@@ -185,7 +185,7 @@ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor = None,
reconstruction_full = self.decoding(hidden_states_unmasked, masked_token_index)
# for subsequent loss computing
reconstruction_masked_tokens, label_masked_tokens = self.get_reconstructed_masked_tokens(reconstruction_full, history_data, unmasked_token_index, masked_token_index)
- return reconstruction_masked_tokens, label_masked_tokens
+ return reconstruction_masked_tokens.unsqueeze(-1), label_masked_tokens.unsqueeze(-1)
else:
hidden_states_full, _, _ = self.encoding(history_data, mask=False)
return hidden_states_full
diff --git a/baselines/STEP/step_loss/__init__.py b/baselines/STEP/loss/__init__.py
similarity index 100%
rename from baselines/STEP/step_loss/__init__.py
rename to baselines/STEP/loss/__init__.py
diff --git a/baselines/STEP/step_loss/step_loss.py b/baselines/STEP/loss/step_loss.py
similarity index 93%
rename from baselines/STEP/step_loss/step_loss.py
rename to baselines/STEP/loss/step_loss.py
index 86c009b0..8716b9a5 100644
--- a/baselines/STEP/step_loss/step_loss.py
+++ b/baselines/STEP/loss/step_loss.py
@@ -1,6 +1,6 @@
import numpy as np
from torch import nn
-from basicts.losses import masked_mae
+from basicts.metrics import masked_mae
def step_loss(prediction, target, pred_adj, prior_adj, gsl_coefficient, null_val=np.nan):
# graph structure learning loss
diff --git a/baselines/STEP/runner/__init__.py b/baselines/STEP/runner/__init__.py
new file mode 100644
index 00000000..59fa81bc
--- /dev/null
+++ b/baselines/STEP/runner/__init__.py
@@ -0,0 +1,3 @@
+from .tsformer_runner import TSFormerRunner
+
+__all__ = ["TSFormerRunner"]
diff --git a/baselines/STEP/step_runner/step_runner.py b/baselines/STEP/runner/tsformer_runner.py
similarity index 53%
rename from baselines/STEP/step_runner/step_runner.py
rename to baselines/STEP/runner/tsformer_runner.py
index c41f928f..ea199c8f 100644
--- a/baselines/STEP/step_runner/step_runner.py
+++ b/baselines/STEP/runner/tsformer_runner.py
@@ -1,10 +1,12 @@
-import torch
+from typing import Optional, Dict
+import torch
+from tqdm import tqdm
+from easytorch.utils.dist import master_only
from basicts.runners import BaseTimeSeriesForecastingRunner
-from basicts.metrics import masked_mae, masked_rmse, masked_mape
-class STEPRunner(BaseTimeSeriesForecastingRunner):
+class TSFormerRunner(BaseTimeSeriesForecastingRunner):
def __init__(self, cfg: dict):
super().__init__(cfg)
self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
@@ -52,20 +54,34 @@ def forward(self, data: tuple, epoch:int = None, iter_num: int = None, train:boo
tuple: (prediction, real_value)
"""
- # preprocess
- future_data, history_data, long_history_data = data
- history_data = self.to_running_device(history_data) # B, L, N, C
- long_history_data = self.to_running_device(long_history_data) # B, L, N, C
- future_data = self.to_running_device(future_data) # B, L, N, C
+ # Preprocess input data
+ future_data, history_data = data['target'], data['inputs']
+ history_data = self.to_running_device(history_data) # Shape: [B, L, N, C]
+ future_data = self.to_running_device(future_data) # Shape: [B, L, N, C]
+ batch_size, length, num_nodes, _ = future_data.shape
+ # Select input features
history_data = self.select_input_features(history_data)
- long_history_data = self.select_input_features(long_history_data)
# feed forward
- model_return = self.model(history_data=history_data, long_history_data=long_history_data, future_data=None, batch_seen=iter_num, epoch=epoch)
-
- # parse model return
- if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return}
- model_return["inputs"] = self.select_target_features(history_data)
- model_return["target"] = self.select_target_features(future_data)
- return model_return
+ reconstruction_masked_tokens, label_masked_tokens = self.model(history_data=history_data, future_data=None, batch_seen=iter_num, epoch=epoch)
+ results = {'prediction': reconstruction_masked_tokens, 'target': label_masked_tokens, 'inputs': history_data}
+ return results
+
+ @torch.no_grad()
+ @master_only
+ def test(self, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> Dict:
+
+ for data in tqdm(self.test_data_loader):
+ data = self.preprocessing(data)
+ forward_return = self.forward(data=data, epoch=None, iter_num=None, train=False)
+ # re-scale data
+ forward_return = self.postprocessing(forward_return)
+ # metrics
+ if not self.if_evaluate_on_gpu:
+ forward_return['target'] = forward_return['target'].detach().cpu()
+ forward_return['prediction'] = forward_return['prediction'].detach().cpu()
+
+ for metric_name, metric_func in self.metrics.items():
+ metric_item = metric_func(forward_return['prediction'], forward_return['target'], null_val=self.null_val)
+ self.update_epoch_meter("test_"+metric_name, metric_item.item())
diff --git a/baselines/STEP/step_data/__init__.py b/baselines/STEP/step_data/__init__.py
deleted file mode 100644
index bc0cdb04..00000000
--- a/baselines/STEP/step_data/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .pretraining_dataset import PretrainingDataset
-from .forecasting_dataset import ForecastingDataset
-
-__all__ = ["PretrainingDataset", "ForecastingDataset"]
diff --git a/baselines/STEP/step_data/forecasting_dataset.py b/baselines/STEP/step_data/forecasting_dataset.py
deleted file mode 100644
index 99a83771..00000000
--- a/baselines/STEP/step_data/forecasting_dataset.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import os
-
-import torch
-from torch.utils.data import Dataset
-from basicts.utils import load_pkl
-
-
-class ForecastingDataset(Dataset):
- """Time series forecasting dataset."""
-
- def __init__(self, data_file_path: str, index_file_path: str, mode: str, seq_len:int) -> None:
- """Init the dataset in the forecasting stage.
-
- Args:
- data_file_path (str): data file path.
- index_file_path (str): index file path.
- mode (str): train, valid, or test.
- seq_len (int): the length of long term historical data.
- """
-
- super().__init__()
- assert mode in ["train", "valid", "test"], "error mode"
- self._check_if_file_exists(data_file_path, index_file_path)
- # read raw data (normalized)
- data = load_pkl(data_file_path)
- processed_data = data["processed_data"]
- self.data = torch.from_numpy(processed_data).float()
- # read index
- self.index = load_pkl(index_file_path)[mode]
- # length of long term historical data
- self.seq_len = seq_len
- # mask
- self.mask = torch.zeros(self.seq_len, self.data.shape[1], self.data.shape[2])
-
- def _check_if_file_exists(self, data_file_path: str, index_file_path: str):
- """Check if data file and index file exist.
-
- Args:
- data_file_path (str): data file path
- index_file_path (str): index file path
-
- Raises:
- FileNotFoundError: no data file
- FileNotFoundError: no index file
- """
-
- if not os.path.isfile(data_file_path):
- raise FileNotFoundError("BasicTS can not find data file {0}".format(data_file_path))
- if not os.path.isfile(index_file_path):
- raise FileNotFoundError("BasicTS can not find index file {0}".format(index_file_path))
-
- def __getitem__(self, index: int) -> tuple:
- """Get a sample.
-
- Args:
- index (int): the iteration index (not the self.index)
-
- Returns:
- tuple: (future_data, history_data), where the shape of each is L x N x C.
- """
-
- idx = list(self.index[index])
-
- history_data = self.data[idx[0]:idx[1]] # 12
- future_data = self.data[idx[1]:idx[2]] # 12
- if idx[1] - self.seq_len < 0:
- long_history_data = self.mask
- else:
- long_history_data = self.data[idx[1] - self.seq_len:idx[1]] # 11
-
- return future_data, history_data, long_history_data
-
- def __len__(self):
- """Dataset length
-
- Returns:
- int: dataset length
- """
-
- return len(self.index)
diff --git a/baselines/STEP/step_data/pretraining_dataset.py b/baselines/STEP/step_data/pretraining_dataset.py
deleted file mode 100644
index 258f0b78..00000000
--- a/baselines/STEP/step_data/pretraining_dataset.py
+++ /dev/null
@@ -1 +0,0 @@
-from basicts.data import TimeSeriesForecastingDataset as PretrainingDataset
diff --git a/baselines/STEP/step_runner/__init__.py b/baselines/STEP/step_runner/__init__.py
deleted file mode 100644
index dfd4ad9c..00000000
--- a/baselines/STEP/step_runner/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .step_runner import STEPRunner
-
-__all__ = ["STEPRunner"]
diff --git a/baselines/STGCN/METR-LA.py b/baselines/STGCN/METR-LA.py
index e1bcfa63..56441496 100644
--- a/baselines/STGCN/METR-LA.py
+++ b/baselines/STGCN/METR-LA.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/PEMS-BAY.py b/baselines/STGCN/PEMS-BAY.py
index 70efabe1..1e10d9e3 100644
--- a/baselines/STGCN/PEMS-BAY.py
+++ b/baselines/STGCN/PEMS-BAY.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/PEMS03.py b/baselines/STGCN/PEMS03.py
index a46305e3..37ec01bb 100644
--- a/baselines/STGCN/PEMS03.py
+++ b/baselines/STGCN/PEMS03.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/PEMS04.py b/baselines/STGCN/PEMS04.py
index bd15da31..33b4d8e6 100644
--- a/baselines/STGCN/PEMS04.py
+++ b/baselines/STGCN/PEMS04.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/PEMS07.py b/baselines/STGCN/PEMS07.py
index 60b8e3e6..6ca0b66d 100644
--- a/baselines/STGCN/PEMS07.py
+++ b/baselines/STGCN/PEMS07.py
@@ -1,43 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/PEMS08.py b/baselines/STGCN/PEMS08.py
index 26f6ba17..b09ae01c 100644
--- a/baselines/STGCN/PEMS08.py
+++ b/baselines/STGCN/PEMS08.py
@@ -1,44 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STGCN
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGCN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGCN"
-CFG.MODEL.ARCH = STGCN
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME + "/adj_mx.pkl", "normlap")
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGCN
+adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "normlap")
adj_mx = torch.Tensor(adj_mx[0])
-CFG.MODEL.PARAM = {
- "Ks" : 3,
+MODEL_PARAM = {
+ "Ks" : 3,
"Kt" : 3,
"blocks" : [[1], [64, 16, 64], [64, 16, 64], [128, 128], [12]],
"T" : 12,
@@ -49,73 +38,105 @@
"bias": True,
"droprate" : 0.5
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
-}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGCN/run.sh b/baselines/STGCN/run.sh
deleted file mode 100644
index a84201f3..00000000
--- a/baselines/STGCN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STGCN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS08.py --gpus '0'
diff --git a/baselines/STGODE/METR-LA.py b/baselines/STGODE/METR-LA.py
index 1efc8319..b2d2df59 100644
--- a/baselines/STGODE/METR-LA.py
+++ b/baselines/STGODE/METR-LA.py
@@ -1,44 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-from .generate_matrices import generate_dtw_spa_matrix
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGODE model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGODE"
-CFG.MODEL.ARCH = STGODE
-# read
-A_se_wave, A_sp_wave = generate_dtw_spa_matrix(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN)
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
"num_nodes": 207,
"num_features": 3,
"num_timesteps_input": 12,
@@ -46,73 +34,109 @@
"A_sp_hat" : A_sp_wave,
"A_se_hat" : A_se_wave
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"step_size": 50,
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGODE/PEMS-BAY.py b/baselines/STGODE/PEMS-BAY.py
index b93ebea8..8546c100 100644
--- a/baselines/STGODE/PEMS-BAY.py
+++ b/baselines/STGODE/PEMS-BAY.py
@@ -1,44 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-from .generate_matrices import generate_dtw_spa_matrix
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGODE model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic Speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGODE"
-CFG.MODEL.ARCH = STGODE
-# read
-A_se_wave, A_sp_wave = generate_dtw_spa_matrix(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN)
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
"num_nodes": 325,
"num_features": 3,
"num_timesteps_input": 12,
@@ -46,73 +34,109 @@
"A_sp_hat" : A_sp_wave,
"A_se_hat" : A_se_wave
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"step_size": 50,
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGODE/PEMS04.py b/baselines/STGODE/PEMS04.py
index 4423e118..ea98cf9d 100644
--- a/baselines/STGODE/PEMS04.py
+++ b/baselines/STGODE/PEMS04.py
@@ -1,44 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-from .generate_matrices import generate_dtw_spa_matrix
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGODE model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGODE"
-CFG.MODEL.ARCH = STGODE
-# read
-A_se_wave, A_sp_wave = generate_dtw_spa_matrix(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN)
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
"num_nodes": 307,
"num_features": 3,
"num_timesteps_input": 12,
@@ -46,73 +34,109 @@
"A_sp_hat" : A_sp_wave,
"A_se_hat" : A_se_wave
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"step_size": 50,
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGODE/PEMS08.py b/baselines/STGODE/PEMS08.py
index 5c82ee25..ffd0ba56 100644
--- a/baselines/STGODE/PEMS08.py
+++ b/baselines/STGODE/PEMS08.py
@@ -1,44 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-from basicts.utils import load_adj
-from .generate_matrices import generate_dtw_spa_matrix
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STGODE model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STGODE"
-CFG.MODEL.ARCH = STGODE
-# read
-A_se_wave, A_sp_wave = generate_dtw_spa_matrix(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN)
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
"num_nodes": 170,
"num_features": 3,
"num_timesteps_input": 12,
@@ -46,73 +34,109 @@
"A_sp_hat" : A_sp_wave,
"A_se_hat" : A_se_wave
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"step_size": 50,
"gamma": 0.5
}
-
-# ================= train ================= #
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STGODE/generate_matrices.py b/baselines/STGODE/generate_matrices.py
index cfd808a1..d4876cb4 100644
--- a/baselines/STGODE/generate_matrices.py
+++ b/baselines/STGODE/generate_matrices.py
@@ -12,7 +12,7 @@
from tqdm import tqdm
from fastdtw import fastdtw
-from basicts.utils.serialization import load_pkl
+from basicts.utils.serialization import load_pkl, load_dataset_data
def get_normalized_adj(A):
@@ -29,7 +29,7 @@ def get_normalized_adj(A):
return torch.from_numpy(A_reg.astype(np.float32))
-def generate_dtw_spa_matrix(dataset_name, in_len, out_len, sigma1=0.1, thres1=0.6, sigma2=10, thres2=0.5, re_scale=True):
+def generate_dtw_spa_matrix(dataset_name, sigma1=0.1, thres1=0.6, sigma2=10, thres2=0.5):
"""read data, generate spatial adjacency matrix and semantic adjacency matrix by dtw
Args:
@@ -45,9 +45,7 @@ def generate_dtw_spa_matrix(dataset_name, in_len, out_len, sigma1=0.1, thres1=0.
"""
# original STGODE use the full time series to generate the matrices, which is not reasonable since the test set is not available in real world
- data_file = "./datasets/{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(dataset_name, in_len, out_len, re_scale)
- with open(data_file, 'rb') as f:
- data = pickle.load(f)["processed_data"]
+ data = load_dataset_data(dataset_name=dataset_name)
num_node = data.shape[1]
if not os.path.exists('{0}/{1}_dtw_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name)):
print("generate dtw distance matrix")
@@ -75,6 +73,7 @@ def generate_dtw_spa_matrix(dataset_name, in_len, out_len, sigma1=0.1, thres1=0.
# STGODE provides the scripts to generate spatial matrix for PEMS03, PEMS04, PEMS07, PEMS08
# For other datasets, we use the original spatial matrix.
if dataset_name in ["PEMS03", "PEMS04", "PEMS07", "PEMS08"]:
+ print("STGODE generate spatial matrix based on the raw data. Please ensure the raw data is placed in the correct path `datasets/raw_data/$DATASET_NAME/$DATASET_NAME.csv.")
if not os.path.exists('{0}/{1}_spatial_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name)):
graph_csv_file_path = "./datasets/raw_data/{0}/{0}.csv".format(dataset_name)
with open(graph_csv_file_path, 'r') as fp:
@@ -111,4 +110,7 @@ def generate_dtw_spa_matrix(dataset_name, in_len, out_len, sigma1=0.1, thres1=0.
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- generate_dtw_spa_matrix("PEMS04", 12, 12, re_scale=True)
+ # generate_dtw_spa_matrix("PEMS04")
+ # generate_dtw_spa_matrix("PEMS08")
+ generate_dtw_spa_matrix("PEMS-BAY")
+ generate_dtw_spa_matrix("METR-LA")
diff --git a/baselines/STID/CA.py b/baselines/STID/CA.py
index 3020395e..bef0a901 100644
--- a/baselines/STID/CA.py
+++ b/baselines/STID/CA.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "CA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'CA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 8600,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 4,
"if_node": True,
"node_dim": 128,
@@ -49,74 +40,108 @@
"time_of_day_size": 96,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 60, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 50
-# test data
+CFG.TEST.INTERVAL = 1
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.USE_GPU = False
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/ETTh1.py b/baselines/STID/ETTh1.py
index 9939ea26..5b91ceee 100644
--- a/baselines/STID/ETTh1.py
+++ b/baselines/STID/ETTh1.py
@@ -1,46 +1,38 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 7,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
- "node_dim": 32,
+ "node_dim": 8,
"if_T_i_D": True,
"if_D_i_W": True,
"temp_dim_tid": 8,
@@ -48,73 +40,107 @@
"time_of_day_size": 24,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
\ No newline at end of file
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/ETTh2.py b/baselines/STID/ETTh2.py
index 83f9dbc5..fba18116 100644
--- a/baselines/STID/ETTh2.py
+++ b/baselines/STID/ETTh2.py
@@ -1,46 +1,38 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 7,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
- "node_dim": 32,
+ "node_dim": 8,
"if_T_i_D": True,
"if_D_i_W": True,
"temp_dim_tid": 8,
@@ -48,73 +40,107 @@
"time_of_day_size": 24,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
-# CFG.TRAIN.CLIP_GRAD_PARAM = {
-# 'max_norm': 5.0
-# }
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
\ No newline at end of file
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/ETTm1.py b/baselines/STID/ETTm1.py
index fd7f7b2a..3bf4c702 100644
--- a/baselines/STID/ETTm1.py
+++ b/baselines/STID/ETTm1.py
@@ -1,120 +1,146 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 7,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
- "node_dim": 32,
+ "node_dim": 8,
"if_T_i_D": True,
"if_D_i_W": True,
"temp_dim_tid": 8,
"temp_dim_diw": 8,
- "time_of_day_size": 24,
+ "time_of_day_size": 24 * 4,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
\ No newline at end of file
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/ETTm2.py b/baselines/STID/ETTm2.py
index 39903ad5..c6a5ceda 100644
--- a/baselines/STID/ETTm2.py
+++ b/baselines/STID/ETTm2.py
@@ -1,120 +1,146 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 7,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
- "node_dim": 32,
+ "node_dim": 8,
"if_T_i_D": True,
"if_D_i_W": True,
"temp_dim_tid": 8,
"temp_dim_diw": 8,
- "time_of_day_size": 24,
+ "time_of_day_size": 24 * 4,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
\ No newline at end of file
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/Electricity.py b/baselines/STID/Electricity.py
index 1f6588e8..994f448b 100644
--- a/baselines/STID/Electricity.py
+++ b/baselines/STID/Electricity.py
@@ -1,46 +1,38 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 321,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
- "node_dim": 32,
+ "node_dim": 8,
"if_T_i_D": True,
"if_D_i_W": True,
"temp_dim_tid": 8,
@@ -48,74 +40,107 @@
"time_of_day_size": 24,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
+ "milestones": [1, 3, 5],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 16
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
+CFG.VAL.DATA.BATCH_SIZE = 64
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 16
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
-# ================= evaluate ================= #
CFG.EVAL = EasyDict()
-# CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
-CFG.EVAL.HORIZONS = [12, 24, 48, 96]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/ExchangeRate.py b/baselines/STID/ExchangeRate.py
index 0eba3dfa..73161ef7 100644
--- a/baselines/STID/ExchangeRate.py
+++ b/baselines/STID/ExchangeRate.py
@@ -1,43 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 8,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
"node_dim": 32,
@@ -48,73 +40,107 @@
"time_of_day_size": 1,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0003,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
\ No newline at end of file
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/GBA.py b/baselines/STID/GBA.py
index 73e9a37c..964e6925 100644
--- a/baselines/STID/GBA.py
+++ b/baselines/STID/GBA.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "GBA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'CA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 2352,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 4,
"if_node": True,
"node_dim": 128,
@@ -49,73 +40,108 @@
"time_of_day_size": 96,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 60, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/GLA.py b/baselines/STID/GLA.py
index 405b1776..7e997b05 100644
--- a/baselines/STID/GLA.py
+++ b/baselines/STID/GLA.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "GLA"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'CA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 3834,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 4,
"if_node": True,
"node_dim": 128,
@@ -49,73 +40,108 @@
"time_of_day_size": 96,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 60, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/Illness.py b/baselines/STID/Illness.py
index c8632cff..257b773f 100644
--- a/baselines/STID/Illness.py
+++ b/baselines/STID/Illness.py
@@ -1,43 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Illness"
-CFG.DATASET_TYPE = "Illness"
-CFG.DATASET_INPUT_LEN = 168
-CFG.DATASET_OUTPUT_LEN = 96
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Illness' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 7,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
"node_dim": 32,
@@ -48,73 +40,106 @@
"time_of_day_size": 1,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+
+# Evaluation parameters
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/METR-LA.py b/baselines/STID/METR-LA.py
index bbd0fcc3..e92acb2a 100644
--- a/baselines/STID/METR-LA.py
+++ b/baselines/STID/METR-LA.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 207,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic speed, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic speed
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/PEMS-BAY.py b/baselines/STID/PEMS-BAY.py
index 0f54edc8..53203fd4 100644
--- a/baselines/STID/PEMS-BAY.py
+++ b/baselines/STID/PEMS-BAY.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 325,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic speed, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic speed
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/PEMS03.py b/baselines/STID/PEMS03.py
index 332774b9..d70fe32d 100644
--- a/baselines/STID/PEMS03.py
+++ b/baselines/STID/PEMS03.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 358,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/PEMS04.py b/baselines/STID/PEMS04.py
index 07d3a92f..d3133f96 100644
--- a/baselines/STID/PEMS04.py
+++ b/baselines/STID/PEMS04.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 307,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/PEMS07.py b/baselines/STID/PEMS07.py
index 8892fba6..6eddd65c 100644
--- a/baselines/STID/PEMS07.py
+++ b/baselines/STID/PEMS07.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 883,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/PEMS08.py b/baselines/STID/PEMS08.py
index 73cf0c02..fee28d8d 100644
--- a/baselines/STID/PEMS08.py
+++ b/baselines/STID/PEMS08.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 170,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 3,
"if_node": True,
"node_dim": 32,
@@ -49,73 +40,108 @@
"time_of_day_size": 288,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/SD.py b/baselines/STID/SD.py
index 47b54aaf..9586c124 100644
--- a/baselines/STID/SD.py
+++ b/baselines/STID/SD.py
@@ -1,44 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "SD"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'SD' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 716,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 3,
"embed_dim": 32,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "output_len": OUTPUT_LEN,
"num_layer": 4,
"if_node": True,
"node_dim": 64,
@@ -49,73 +40,110 @@
"time_of_day_size": 96,
"day_of_week_size": 7
}
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # traffic flow, time in day
-CFG.MODEL.TARGET_FEATURES = [0] # traffic flow
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
-# ================= optim ================= #
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 60, 80],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 32
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
+# Early stopping
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 # Early stopping patience. Default: None. If not specified, the early stopping will not be used.
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/Traffic.py b/baselines/STID/Traffic.py
index 827d5f68..4fe41115 100644
--- a/baselines/STID/Traffic.py
+++ b/baselines/STID/Traffic.py
@@ -1,43 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Traffic"
-CFG.DATASET_TYPE = "Traffic"
-CFG.DATASET_INPUT_LEN = 168
-CFG.DATASET_OUTPUT_LEN = 96
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Traffic' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 862,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
"node_dim": 32,
@@ -45,76 +37,110 @@
"if_D_i_W": True,
"temp_dim_tid": 8,
"temp_dim_diw": 8,
- "time_of_day_size": 1,
+ "time_of_day_size": 24,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/Weather.py b/baselines/STID/Weather.py
index a4170ce2..63af02a8 100644
--- a/baselines/STID/Weather.py
+++ b/baselines/STID/Weather.py
@@ -1,43 +1,35 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.losses import masked_mae, masked_mse
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import STID
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STID model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STID"
-CFG.MODEL.ARCH = STID
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STID
+MODEL_PARAM = {
"num_nodes": 21,
- "input_len": CFG.DATASET_INPUT_LEN,
+ "input_len": INPUT_LEN,
"input_dim": 1,
- "embed_dim": 1024,
- "output_len": CFG.DATASET_OUTPUT_LEN,
+ "embed_dim": 2048,
+ "output_len": OUTPUT_LEN,
"num_layer": 1,
"if_node": True,
"node_dim": 32,
@@ -48,73 +40,107 @@
"time_of_day_size": 144,
"day_of_week_size": 7
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002,
+ "lr": 0.0005,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 25, 50],
- "gamma": 0.5
+ "milestones": [1, 3, 5],
+ "gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STID/run.sh b/baselines/STID/run.sh
deleted file mode 100644
index 75941b9e..00000000
--- a/baselines/STID/run.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STID/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS08.py --gpus '0'
-
diff --git a/baselines/STID_M4/M4.py b/baselines/STID_M4/M4.py
deleted file mode 100644
index d845540d..00000000
--- a/baselines/STID_M4/M4.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.losses import masked_mae
-from basicts.data import M4ForecastingDataset
-from basicts.runners import M4ForecastingRunner
-
-from .arch import STID
-
-def get_cfg(seasonal_pattern):
- assert seasonal_pattern in ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- prediction_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- num_nodes = {"Yearly": 23000, "Quarterly": 24000, "Monthly": 48000, "Weekly": 359, "Daily": 4227, "Hourly": 414}[seasonal_pattern]
- history_size = 2
- history_len = history_size * prediction_len
-
- CFG = EasyDict()
-
- # ================= general ================= #
- CFG.DESCRIPTION = "Multi-layer perceptron model configuration"
- CFG.RUNNER = M4ForecastingRunner
- CFG.DATASET_CLS = M4ForecastingDataset
- CFG.DATASET_NAME = "M4_" + seasonal_pattern
- CFG.DATASET_INPUT_LEN = history_len
- CFG.DATASET_OUTPUT_LEN = prediction_len
- CFG.GPU_NUM = 1
-
- # ================= environment ================= #
- CFG.ENV = EasyDict()
- CFG.ENV.SEED = 1
- CFG.ENV.CUDNN = EasyDict()
- CFG.ENV.CUDNN.ENABLED = True
-
- # ================= model ================= #
- CFG.MODEL = EasyDict()
- CFG.MODEL.NAME = "STID"
- CFG.MODEL.ARCH = STID
- CFG.MODEL.PARAM = {
- "num_nodes": num_nodes,
- "input_len": CFG.DATASET_INPUT_LEN,
- "input_dim": 1,
- "embed_dim": 256 if seasonal_pattern not in ["Yearly", "Quarterly", "Monthly"] else 128,
- "output_len": CFG.DATASET_OUTPUT_LEN,
- "num_layer": 4,
- "if_node": True,
- "node_dim": 16,
- "if_T_i_D": False, # no temporal features in M4
- "if_D_i_W": False,
- "temp_dim_tid": 32,
- "temp_dim_diw": 32,
- "time_of_day_size": 288,
- "day_of_week_size": 7
- }
- CFG.MODEL.FORWARD_FEATURES = [0, 1] # values, node id
- CFG.MODEL.TARGET_FEATURES = [0]
-
- # ================= optim ================= #
- CFG.TRAIN = EasyDict()
- CFG.TRAIN.LOSS = masked_mae
- CFG.TRAIN.OPTIM = EasyDict()
- CFG.TRAIN.OPTIM.TYPE = "Adam"
- CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
- }
- CFG.TRAIN.LR_SCHEDULER = EasyDict()
- CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
- CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 80],
- "gamma": 0.5
- }
-
- # ================= train ================= #
- CFG.TRAIN.CLIP_GRAD_PARAM = {
- 'max_norm': 5.0
- }
- CFG.TRAIN.NUM_EPOCHS = 99
- CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
- )
- # train data
- CFG.TRAIN.DATA = EasyDict()
- # read data
- CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TRAIN.DATA.BATCH_SIZE = 64
- CFG.TRAIN.DATA.PREFETCH = False
- CFG.TRAIN.DATA.SHUFFLE = True
- CFG.TRAIN.DATA.NUM_WORKERS = 2
- CFG.TRAIN.DATA.PIN_MEMORY = False
-
- # ================= test ================= #
- CFG.TEST = EasyDict()
- CFG.TEST.INTERVAL = CFG.TRAIN.NUM_EPOCHS
- # test data
- CFG.TEST.DATA = EasyDict()
- # read data
- CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
- # dataloader args, optional
- CFG.TEST.DATA.BATCH_SIZE = 64
- CFG.TEST.DATA.PREFETCH = False
- CFG.TEST.DATA.SHUFFLE = False
- CFG.TEST.DATA.NUM_WORKERS = 2
- CFG.TEST.DATA.PIN_MEMORY = False
-
- # ================= evaluate ================= #
- CFG.EVAL = EasyDict()
- CFG.EVAL.HORIZONS = []
- CFG.EVAL.SAVE_PATH = os.path.abspath(__file__ + "/..")
-
- return CFG
diff --git a/baselines/STID_M4/arch/__init__.py b/baselines/STID_M4/arch/__init__.py
deleted file mode 100644
index 64b16477..00000000
--- a/baselines/STID_M4/arch/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .stid_arch import STID
-
-__all__ = ["STID"]
diff --git a/baselines/STID_M4/arch/mlp.py b/baselines/STID_M4/arch/mlp.py
deleted file mode 100644
index 17fccbc1..00000000
--- a/baselines/STID_M4/arch/mlp.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import torch
-from torch import nn
-
-
-class MultiLayerPerceptron(nn.Module):
- """Multi-Layer Perceptron with residual links."""
-
- def __init__(self, input_dim, hidden_dim) -> None:
- super().__init__()
- self.fc1 = nn.Conv2d(
- in_channels=input_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
- self.fc2 = nn.Conv2d(
- in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
- self.act = nn.ReLU()
- self.drop = nn.Dropout(p=0.15)
-
- def forward(self, input_data: torch.Tensor) -> torch.Tensor:
- """Feed forward of MLP.
-
- Args:
- input_data (torch.Tensor): input data with shape [B, D, N]
-
- Returns:
- torch.Tensor: latent repr
- """
-
- hidden = self.fc2(self.drop(self.act(self.fc1(input_data)))) # MLP
- hidden = hidden + input_data # residual
- return hidden
diff --git a/baselines/STID_M4/arch/stid_arch.py b/baselines/STID_M4/arch/stid_arch.py
deleted file mode 100644
index 3899376a..00000000
--- a/baselines/STID_M4/arch/stid_arch.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import torch
-from torch import nn
-
-from .mlp import MultiLayerPerceptron
-
-
-class STID(nn.Module):
- """
- Paper: Spatial-Temporal Identity: A Simple yet Effective Baseline for Multivariate Time Series Forecasting
- Link: https://arxiv.org/abs/2208.05233
- Official Code: https://github.com/zezhishao/STID
- """
-
- def __init__(self, **model_args):
- super().__init__()
- # attributes
- self.num_nodes = model_args["num_nodes"]
- self.node_dim = model_args["node_dim"]
- self.input_len = model_args["input_len"]
- self.input_dim = model_args["input_dim"]
- self.embed_dim = model_args["embed_dim"]
- self.output_len = model_args["output_len"]
- self.num_layer = model_args["num_layer"]
- self.temp_dim_tid = model_args["temp_dim_tid"]
- self.temp_dim_diw = model_args["temp_dim_diw"]
- self.time_of_day_size = model_args["time_of_day_size"]
- self.day_of_week_size = model_args["day_of_week_size"]
-
- self.if_time_in_day = model_args["if_T_i_D"]
- self.if_day_in_week = model_args["if_D_i_W"]
- self.if_spatial = model_args["if_node"]
-
- # spatial embeddings
- if self.if_spatial:
- self.node_emb = nn.Parameter(
- torch.empty(self.num_nodes, self.node_dim))
- nn.init.xavier_uniform_(self.node_emb)
- # temporal embeddings
- if self.if_time_in_day:
- self.time_in_day_emb = nn.Parameter(
- torch.empty(self.time_of_day_size, self.temp_dim_tid))
- nn.init.xavier_uniform_(self.time_in_day_emb)
- if self.if_day_in_week:
- self.day_in_week_emb = nn.Parameter(
- torch.empty(self.day_of_week_size, self.temp_dim_diw))
- nn.init.xavier_uniform_(self.day_in_week_emb)
-
- # embedding layer
- self.time_series_emb_layer = nn.Conv2d(
- in_channels=self.input_dim * self.input_len, out_channels=self.embed_dim, kernel_size=(1, 1), bias=True)
-
- # encoding
- self.hidden_dim = self.embed_dim+self.node_dim * \
- int(self.if_spatial)+self.temp_dim_tid*int(self.if_day_in_week) + \
- self.temp_dim_diw*int(self.if_time_in_day)
- self.encoder = nn.Sequential(
- *[MultiLayerPerceptron(self.hidden_dim, self.hidden_dim) for _ in range(self.num_layer)])
-
- # regression
- self.regression_layer = nn.Conv2d(
- in_channels=self.hidden_dim, out_channels=self.output_len, kernel_size=(1, 1), bias=True)
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- """Feed forward of STID.
-
- Args:
- history_data (torch.Tensor): history data with shape [B, L, N, C]
-
- Returns:
- torch.Tensor: prediction with shape [B, L, N, C]
- """
-
- # prepare data
- input_data = history_data[..., range(self.input_dim)]
- node_id = history_data[:, 0, 0, 1].type(torch.int64) # the second dimension is node ids
- # no temporal features in M4 (M4 dataset only provides fake date, which is unreliable to extract temporal features)
- time_in_day_emb = None
- day_in_week_emb = None
- node_embed = self.node_emb[node_id]
-
- # time series embedding
- batch_size, _, num_nodes, _ = input_data.shape
- input_data = input_data.transpose(1, 2).contiguous()
- input_data = input_data.view(
- batch_size, num_nodes, -1).transpose(1, 2).unsqueeze(-1)
- time_series_emb = self.time_series_emb_layer(input_data)
-
- node_emb = []
- if self.if_spatial:
- # expand node embeddings
- node_emb.append(node_embed.unsqueeze(-1).unsqueeze(-1))
- # temporal embeddings
- tem_emb = []
- if time_in_day_emb is not None:
- tem_emb.append(time_in_day_emb.transpose(1, 2).unsqueeze(-1))
- if day_in_week_emb is not None:
- tem_emb.append(day_in_week_emb.transpose(1, 2).unsqueeze(-1))
-
- # concate all embeddings
- hidden = torch.cat([time_series_emb] + node_emb + tem_emb, dim=1)
-
- # encoding
- hidden = self.encoder(hidden)
-
- # regression
- prediction = self.regression_layer(hidden)
-
- return prediction
diff --git a/baselines/STNorm/METR-LA.py b/baselines/STNorm/METR-LA.py
index 4394dcfa..f788e729 100644
--- a/baselines/STNorm/METR-LA.py
+++ b/baselines/STNorm/METR-LA.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 207,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/PEMS-BAY.py b/baselines/STNorm/PEMS-BAY.py
index f62e1dee..77521a9e 100644
--- a/baselines/STNorm/PEMS-BAY.py
+++ b/baselines/STNorm/PEMS-BAY.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 325,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/PEMS03.py b/baselines/STNorm/PEMS03.py
index 2427a09d..eb228613 100644
--- a/baselines/STNorm/PEMS03.py
+++ b/baselines/STNorm/PEMS03.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 358,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/PEMS04.py b/baselines/STNorm/PEMS04.py
index decb1bc5..07674792 100644
--- a/baselines/STNorm/PEMS04.py
+++ b/baselines/STNorm/PEMS04.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 307,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/PEMS07.py b/baselines/STNorm/PEMS07.py
index af56452a..f80d40b9 100644
--- a/baselines/STNorm/PEMS07.py
+++ b/baselines/STNorm/PEMS07.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 883,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/PEMS08.py b/baselines/STNorm/PEMS08.py
index 7839b639..9dfd4f89 100644
--- a/baselines/STNorm/PEMS08.py
+++ b/baselines/STNorm/PEMS08.py
@@ -1,39 +1,30 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STNorm
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STNorm model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STNorm"
-CFG.MODEL.ARCH = STNorm
-CFG.MODEL.PARAM = {
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STNorm
+MODEL_PARAM = {
"num_nodes" : 170,
"tnorm_bool": True,
"snorm_bool": True,
@@ -44,73 +35,108 @@
"blocks" : 4,
"layers" : 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 0.0001,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STNorm/run.sh b/baselines/STNorm/run.sh
deleted file mode 100644
index 2e33cf70..00000000
--- a/baselines/STNorm/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STNorm/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS08.py --gpus '0'
diff --git a/baselines/STWave/METR-LA.py b/baselines/STWave/METR-LA.py
index 54680fec..adb4d593 100644
--- a/baselines/STWave/METR-LA.py
+++ b/baselines/STWave/METR-LA.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -48,34 +50,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -89,72 +79,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "db1",
"wave_levels": 1,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [80, 90, 95],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/PEMS-BAY.py b/baselines/STWave/PEMS-BAY.py
index 43af0755..34d4f1a4 100644
--- a/baselines/STWave/PEMS-BAY.py
+++ b/baselines/STWave/PEMS-BAY.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -48,34 +50,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -89,72 +79,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "db1",
"wave_levels": 1,
}
+NUM_EPOCHS = 12
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [9, 10],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 12
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/PEMS03.py b/baselines/STWave/PEMS03.py
index 5fb51dfe..3ebf2b08 100644
--- a/baselines/STWave/PEMS03.py
+++ b/baselines/STWave/PEMS03.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -46,34 +48,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -87,72 +77,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "sym2",
"wave_levels": 1,
}
+NUM_EPOCHS = 50
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [30, 40, 45],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/PEMS04.py b/baselines/STWave/PEMS04.py
index ba877ff5..6ac6637e 100644
--- a/baselines/STWave/PEMS04.py
+++ b/baselines/STWave/PEMS04.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -46,34 +48,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -87,72 +77,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "sym2",
"wave_levels": 1,
}
+NUM_EPOCHS = 80
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [65, 70, 75],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 80
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/PEMS07.py b/baselines/STWave/PEMS07.py
index e8d32f69..a15e84c3 100644
--- a/baselines/STWave/PEMS07.py
+++ b/baselines/STWave/PEMS07.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -46,34 +48,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -87,72 +77,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "sym2",
"wave_levels": 1,
}
+NUM_EPOCHS = 110
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [100, 105],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 110
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/PEMS08.py b/baselines/STWave/PEMS08.py
index dd4af92b..6bed806a 100644
--- a/baselines/STWave/PEMS08.py
+++ b/baselines/STWave/PEMS08.py
@@ -2,21 +2,23 @@
import sys
import numpy as np
import scipy.sparse as sp
+from easydict import EasyDict
import math
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import dijkstra
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.utils import load_adj
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
from .arch import STWave
from .loss import stwave_masked_mae
+
def laplacian(W):
"""Return the Laplacian of the weight matrix."""
# Degree matrix.
@@ -46,34 +48,22 @@ def loadGraph(adj_mx, hs, ls):
adj_gat = np.argpartition(dist_matrix, sampled_nodes_number, -1)[:, :sampled_nodes_number]
return adj_gat, graphwave
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "STWave model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "STWave"
-CFG.MODEL.ARCH = STWave
-adj_mx, _ = load_adj("datasets/" + CFG.DATASET_NAME +
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+adj_mx, _ = load_adj("datasets/" + DATA_NAME +
"/adj_mx.pkl", "original")
adjgat, gwv = loadGraph(_, 128, 1)
-CFG.MODEL.PARAM = {
+MODEL_ARCH = STWave
+MODEL_PARAM = {
"input_dim": 1,
"hidden_size": 128,
"layers": 2,
@@ -87,72 +77,107 @@ def loadGraph(adj_mx, hs, ls):
"wave_type": "coif1",
"wave_levels": 2,
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = stwave_masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [80, 90, 95],
+ "milestones": [100, 105],
"gamma": 0.1
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
- "max_norm": 5.0
+ 'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/STWave/arch/stwave_arch.py b/baselines/STWave/arch/stwave_arch.py
index 104987d3..f9747781 100644
--- a/baselines/STWave/arch/stwave_arch.py
+++ b/baselines/STWave/arch/stwave_arch.py
@@ -177,7 +177,7 @@ def __init__(self, hidden_size, kernel_size=2, dropout=0.2, levels=1):
layers += [nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)]
self.tcn = nn.Sequential(*layers)
-
+
def forward(self, xh):
xh = self.tcn(xh.transpose(1,3)).transpose(1,3)
return xh
@@ -208,7 +208,7 @@ def forward(self, xl, xh, te, Mask=True):
valueh = torch.relu(self.vhfc(xh)).permute(0,2,1,3)
attentionh = torch.matmul(query, keyh) # [B,N,T,T]
-
+
if Mask:
batch_size = xl.shape[0]
num_steps = xl.shape[1]
@@ -237,26 +237,26 @@ def __init__(self, hidden_size, log_samples, adj_gat, graphwave):
super(dualEncoder, self).__init__()
self.tcn = temporalConvNet(hidden_size)
self.tatt = temporalAttention(hidden_size)
-
+
self.ssal = sparseSpatialAttention(hidden_size, log_samples)
self.ssah = sparseSpatialAttention(hidden_size, log_samples)
-
+
eigvalue = torch.from_numpy(graphwave[0].astype(np.float32))
self.eigvalue = nn.Parameter(eigvalue, requires_grad=True)
self.eigvec = torch.from_numpy(graphwave[1].astype(np.float32)).transpose(0,1).unsqueeze(-1)
self.adj = torch.from_numpy(adj_gat)
-
+
def forward(self, xl, xh, te):
xl = self.tatt(xl, te)
xh = self.tcn(xh)
-
+
spa_statesl = self.ssal(xl, self.adj.to(xl.device), self.eigvec.to(xl.device), self.eigvalue.to(xl.device))
spa_statesh = self.ssah(xh, self.adj.to(xl.device), self.eigvec.to(xl.device), self.eigvalue.to(xl.device))
xl = spa_statesl + xl
xh = spa_statesh + xh
-
- return xl, xh
+ return xl, xh
+
class STWave(nn.Module):
"""
Paper: When Spatio-Temporal Meet Wavelets: Disentangled Traffic Forecasting via Efficient Spectral Graph Attention Networks
@@ -272,10 +272,10 @@ def __init__(self, input_dim, hidden_size, layers, seq_len, horizon, log_samples
self.dual_encoder = nn.ModuleList([dualEncoder(hidden_size, log_samples, adj_gat, graphwave) for i in range(layers)])
self.adaptive_fusion = adaptiveFusion(hidden_size)
-
+
self.pre_l = nn.Conv2d(seq_len, horizon, (1,1))
self.pre_h = nn.Conv2d(seq_len, horizon, (1,1))
-
+
self.end_emb = FeedForward([hidden_size, hidden_size, input_dim])
self.end_emb_l = FeedForward([hidden_size, hidden_size, input_dim])
@@ -304,17 +304,17 @@ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_s
for enc in self.dual_encoder:
xl, xh = enc(xl, xh, TE[:,:xl.shape[1],:,:])
-
+
hat_y_l = self.pre_l(xl)
hat_y_h = self.pre_h(xh)
hat_y = self.adaptive_fusion(hat_y_l, hat_y_h, TE[:,xl.shape[1]:,:,:])
hat_y, hat_y_l = self.end_emb(hat_y), self.end_emb_l(hat_y_l)
-
+
if self.training:
label_yl, _ = disentangle(future_data[...,0:1].cpu().numpy(), self.wt, self.wl)
return torch.cat([hat_y, hat_y_l, label_yl.to(x.device)], -1)
-
+
return hat_y
diff --git a/baselines/STWave/loss.py b/baselines/STWave/loss.py
index 35f63f7e..18d4fdbf 100644
--- a/baselines/STWave/loss.py
+++ b/baselines/STWave/loss.py
@@ -1,7 +1,7 @@
import torch
import numpy as np
-from basicts.losses import masked_mae
+from basicts.metrics import masked_mae
def stwave_masked_mae(prediction: list, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
diff --git a/baselines/STWave/run.sh b/baselines/STWave/run.sh
deleted file mode 100644
index 61fd7ca0..00000000
--- a/baselines/STWave/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STWave/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS08.py --gpus '0'
diff --git a/baselines/StemGNN/METR-LA.py b/baselines/StemGNN/METR-LA.py
index b1ced724..dd268794 100644
--- a/baselines/StemGNN/METR-LA.py
+++ b/baselines/StemGNN/METR-LA.py
@@ -1,40 +1,31 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use Adam as the optimizer and MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "METR-LA"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
+MODEL_PARAM = {
"units": 207,
"stack_cnt": 2,
"time_step": 12,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.0004
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/PEMS-BAY.py b/baselines/StemGNN/PEMS-BAY.py
index b5fc7a12..6d6e8c5e 100644
--- a/baselines/StemGNN/PEMS-BAY.py
+++ b/baselines/StemGNN/PEMS-BAY.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use Adam as the optimizer and MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS-BAY' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS-BAY"
-CFG.DATASET_TYPE = "Traffic speed"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
- "units": 325,
+MODEL_PARAM = {
+ "units": 325,
"stack_cnt": 2,
"time_step": 12,
"multi_layer": 5,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.0004
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/PEMS03.py b/baselines/StemGNN/PEMS03.py
index c276d6bf..0af8bf5a 100644
--- a/baselines/StemGNN/PEMS03.py
+++ b/baselines/StemGNN/PEMS03.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use Adam as the optimizer and MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS03' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS03"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
- "units": 358,
+MODEL_PARAM = {
+ "units": 358,
"stack_cnt": 2,
"time_step": 12,
"multi_layer": 5,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/PEMS04.py b/baselines/StemGNN/PEMS04.py
index dfaad1a4..bb87639e 100644
--- a/baselines/StemGNN/PEMS04.py
+++ b/baselines/StemGNN/PEMS04.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
- "units": 307,
+MODEL_PARAM = {
+ "units": 307,
"stack_cnt": 2,
"time_step": 12,
"multi_layer": 5,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "RMSprop"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/PEMS07.py b/baselines/StemGNN/PEMS07.py
index 4ff7e80a..000f0a49 100644
--- a/baselines/StemGNN/PEMS07.py
+++ b/baselines/StemGNN/PEMS07.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use Adam as the optimizer and MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS07' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS07"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
- "units": 883,
+MODEL_PARAM = {
+ "units": 883,
"stack_cnt": 2,
"time_step": 12,
"multi_layer": 5,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/PEMS08.py b/baselines/StemGNN/PEMS08.py
index 6a1d7c96..5f5dd3dc 100644
--- a/baselines/StemGNN/PEMS08.py
+++ b/baselines/StemGNN/PEMS08.py
@@ -1,41 +1,32 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
-"""Different from the official code, we use Adam as the optimizer and MAE as the loss function since they bring better performance."""
from .arch import StemGNN
-CFG = EasyDict()
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = StemGNN
-# ================= general ================= #
-CFG.DESCRIPTION = "StemGNN model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic flow"
-CFG.DATASET_INPUT_LEN = 12
-CFG.DATASET_OUTPUT_LEN = 12
-CFG.GPU_NUM = 1
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "StemGNN"
-CFG.MODEL.ARCH = StemGNN
-CFG.MODEL.PARAM = {
- "units": 170,
+MODEL_PARAM = {
+ "units": 170,
"stack_cnt": 2,
"time_step": 12,
"multi_layer": 5,
@@ -43,69 +34,105 @@
"dropout_rate": 0.5,
"leaky_rate": 0.2
}
-CFG.MODEL.FORWARD_FEATURES = [0]
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM= {
- "lr":0.002
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0004,
+ "weight_decay": 0.0003,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
- "milestones": [1, 50, 100],
+ "milestones": [1, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 16
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [3, 6, 12]
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/StemGNN/run.sh b/baselines/StemGNN/run.sh
deleted file mode 100644
index 25cb8053..00000000
--- a/baselines/StemGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/StemGNN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS08.py --gpus '0'
diff --git a/baselines/TimesNet/ETTh1.py b/baselines/TimesNet/ETTh1.py
index 343372fc..5d37971c 100644
--- a/baselines/TimesNet/ETTh1.py
+++ b/baselines/TimesNet/ETTh1.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -52,74 +42,108 @@
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/ETTh2.py b/baselines/TimesNet/ETTh2.py
index 20a0c2b3..594dee42 100644
--- a/baselines/TimesNet/ETTh2.py
+++ b/baselines/TimesNet/ETTh2.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -52,74 +42,108 @@
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/ETTm1.py b/baselines/TimesNet/ETTm1.py
index 451de593..61fed8bf 100644
--- a/baselines/TimesNet/ETTm1.py
+++ b/baselines/TimesNet/ETTm1.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -48,78 +38,112 @@
"embed": "timeF", # [timeF, fixed, learned]
"dropout": 0.05,
"num_time_features": 4, # number of used time features
- "time_of_day_size": 24 * 4,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/ETTm2.py b/baselines/TimesNet/ETTm2.py
index 9cf9a233..b6e313c1 100644
--- a/baselines/TimesNet/ETTm2.py
+++ b/baselines/TimesNet/ETTm2.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 7
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -48,78 +38,112 @@
"embed": "timeF", # [timeF, fixed, learned]
"dropout": 0.05,
"num_time_features": 4, # number of used time features
- "time_of_day_size": 24 * 4,
+ "time_of_day_size": 24*4,
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/Electricity.py b/baselines/TimesNet/Electricity.py
index 24cfbd75..0eb57a9f 100644
--- a/baselines/TimesNet/Electricity.py
+++ b/baselines/TimesNet/Electricity.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 321
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5,
@@ -53,63 +43,107 @@
"day_of_month_size": 31,
"day_of_year_size": 366
}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mse
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0005,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25, 50],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 10
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/ExchangeRate.py b/baselines/TimesNet/ExchangeRate.py
index 163ee044..354681ad 100644
--- a/baselines/TimesNet/ExchangeRate.py
+++ b/baselines/TimesNet/ExchangeRate.py
@@ -1,43 +1,34 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+INPUT_LEN = 96
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
NUM_NODES = 8
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -52,71 +43,108 @@
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.0002,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/Weather.py b/baselines/TimesNet/Weather.py
index 94297a12..8b846b0d 100644
--- a/baselines/TimesNet/Weather.py
+++ b/baselines/TimesNet/Weather.py
@@ -1,43 +1,33 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mse, masked_mae
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import TimesNet
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "TimesNet model configuration "
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather Data"
-CFG.DATASET_INPUT_LEN = 96
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "TimesNet"
-CFG.MODEL.ARCH = TimesNet
-NUM_NODES = 21
-CFG.MODEL.PARAM = {
- "seq_len": CFG.DATASET_INPUT_LEN,
- "label_len": CFG.DATASET_INPUT_LEN // 2, # start token length used in decoder
- "pred_len": CFG.DATASET_OUTPUT_LEN, # prediction sequence length
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = TimesNet
+NUM_NODES = 8
+MODEL_PARAM = {
+ "seq_len": INPUT_LEN,
+ "label_len": INPUT_LEN // 2, # start token length used in decoder
+ "pred_len": OUTPUT_LEN, # prediction sequence length
"enc_in": NUM_NODES, # num nodes
"c_out": NUM_NODES,
"top_k": 5, # attn factor
@@ -52,72 +42,108 @@
"day_of_week_size": 7,
"day_of_month_size": 31,
"day_of_year_size": 366
- }
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3, 4]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.00001,
"weight_decay": 0.0005,
}
+# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 25, 50],
"gamma": 0.5
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/TimesNet/run.sh b/baselines/TimesNet/run.sh
deleted file mode 100644
index 9df02f42..00000000
--- a/baselines/TimesNet/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/TimesNet/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/Weather.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/PEMS08.py --gpus '0'
diff --git a/baselines/Triformer/ETTh1.py b/baselines/Triformer/ETTh1.py
index c7627e97..aa67dff4 100644
--- a/baselines/Triformer/ETTh1.py
+++ b/baselines/Triformer/ETTh1.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/ETTh2.py b/baselines/Triformer/ETTh2.py
index 4f436621..c4df760e 100644
--- a/baselines/Triformer/ETTh2.py
+++ b/baselines/Triformer/ETTh2.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/ETTm1.py b/baselines/Triformer/ETTm1.py
index 4ef501cc..a39c29d1 100644
--- a/baselines/Triformer/ETTm1.py
+++ b/baselines/Triformer/ETTm1.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/ETTm2.py b/baselines/Triformer/ETTm2.py
index e0463423..06b6a096 100644
--- a/baselines/Triformer/ETTm2.py
+++ b/baselines/Triformer/ETTm2.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTm2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm2' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 7
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/Electricity.py b/baselines/Triformer/Electricity.py
index 4c91746d..4bd08a52 100644
--- a/baselines/Triformer/Electricity.py
+++ b/baselines/Triformer/Electricity.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Electricity"
-CFG.DATASET_TYPE = "Electricity Consumption"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 321
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0002
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 14
-CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/ExchangeRate.py b/baselines/Triformer/ExchangeRate.py
index 95652075..b7406cff 100644
--- a/baselines/Triformer/ExchangeRate.py
+++ b/baselines/Triformer/ExchangeRate.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ExchangeRate"
-CFG.DATASET_TYPE = "Exchange Rate"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 8
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/PEMS04.py b/baselines/Triformer/PEMS04.py
deleted file mode 100644
index f2dbc000..00000000
--- a/baselines/Triformer/PEMS04.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mae, masked_mse
-
-from .arch import Triformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS04"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-CFG.NULL_VAL = 0.0
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
-NUM_NODES = 307
-CFG.MODEL.PARAM = EasyDict(
- {
- "num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
- "input_dim": 3,
- # default parameters described in the paper
- "channels": 32,
- "patch_sizes": [7, 4, 3, 2, 2],
- "mem_dim": 5
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 8
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 32
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 32
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Triformer/PEMS04_LTSF.py b/baselines/Triformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..7f3b4925
--- /dev/null
+++ b/baselines/Triformer/PEMS04_LTSF.py
@@ -0,0 +1,132 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_rmse, masked_mape
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Triformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
+NUM_NODES = 307
+MODEL_PARAM = {
+ "num_nodes": NUM_NODES,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
+ "input_dim": 3,
+ # default parameters described in the paper
+ "channels": 32,
+ "patch_sizes": [7, 4, 3, 2, 2],
+ "mem_dim": 5
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0001
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/PEMS08.py b/baselines/Triformer/PEMS08.py
deleted file mode 100644
index e0b477ee..00000000
--- a/baselines/Triformer/PEMS08.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.metrics import masked_mae, masked_mse
-
-from .arch import Triformer
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "PEMS08"
-CFG.DATASET_TYPE = "Traffic Flow"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
-NUM_NODES = 170
-CFG.MODEL.PARAM = EasyDict(
- {
- "num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
- "input_dim": 3,
- # default parameters described in the paper
- "channels": 32,
- "patch_sizes": [7, 4, 3, 2, 2],
- "mem_dim": 5
- }
-)
-CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 100
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 16
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/Triformer/PEMS08_LTSF.py b/baselines/Triformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..86b2780f
--- /dev/null
+++ b/baselines/Triformer/PEMS08_LTSF.py
@@ -0,0 +1,132 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_rmse, masked_mape
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Triformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
+NUM_NODES = 170
+MODEL_PARAM = {
+ "num_nodes": NUM_NODES,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
+ "input_dim": 3,
+ # default parameters described in the paper
+ "channels": 32,
+ "patch_sizes": [7, 4, 3, 2, 2],
+ "mem_dim": 5
+ }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.0001
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/Weather.py b/baselines/Triformer/Weather.py
index 4cfb6ab0..dd678a4b 100644
--- a/baselines/Triformer/Weather.py
+++ b/baselines/Triformer/Weather.py
@@ -1,108 +1,140 @@
import os
import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
from .arch import Triformer
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "Triformer model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "Weather"
-CFG.DATASET_TYPE = "Weather"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-# CFG.RESCALE = False
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 0
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "Triformer"
-CFG.MODEL.ARCH = Triformer
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
NUM_NODES = 21
-CFG.MODEL.PARAM = EasyDict(
- {
+MODEL_PARAM = {
"num_nodes": NUM_NODES,
- "lag": CFG.DATASET_INPUT_LEN,
- "horizon": CFG.DATASET_OUTPUT_LEN,
+ "lag": INPUT_LEN,
+ "horizon": OUTPUT_LEN,
"input_dim": 3,
# default parameters described in the paper
"channels": 32,
"patch_sizes": [7, 4, 3, 2, 2],
"mem_dim": 5
}
-)
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
CFG.MODEL.TARGET_FEATURES = [0]
-# ================= optim ================= #
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.0001
+ "lr": 0.0002,
+ "weight_decay": 0.0001,
}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 50
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- 'checkpoints',
- '_'.join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 25],
+ "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ 'max_norm': 5.0
+}
+# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-# ================= validate ================= #
+############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
-# validating data
CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-# ================= test ================= #
+############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
-# test data
CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = 'datasets/' + CFG.DATASET_NAME
-# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-# ================= evaluate ================= #
+############################## Evaluation Configuration ##############################
+
CFG.EVAL = EasyDict()
+
+# Evaluation parameters
CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/baselines/Triformer/run.sh b/baselines/Triformer/run.sh
deleted file mode 100644
index cda838f5..00000000
--- a/baselines/Triformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# # !/bin/bash
-python experiments/train.py -c baselines/Triformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/PEMS08.py --gpus '0'
diff --git a/baselines/WaveNet/ETTh1.py b/baselines/WaveNet/ETTh1.py
deleted file mode 100644
index 13ad3579..00000000
--- a/baselines/WaveNet/ETTh1.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-
-from .arch import WaveNet
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh1"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "WaveNet"
-CFG.MODEL.ARCH = WaveNet
-CFG.MODEL.PARAM = {
- "in_dim": 1,
- "out_dim": CFG.DATASET_OUTPUT_LEN,
- "residual_channels": 16,
- "dilation_channels": 16,
- "skip_channels": 64,
- "end_channels": 128,
- "kernel_size": 12,
- "blocks": 6,
- "layers": 3
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 20
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/WaveNet/ETTh2.py b/baselines/WaveNet/ETTh2.py
deleted file mode 100644
index b4149854..00000000
--- a/baselines/WaveNet/ETTh2.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import os
-import sys
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../.."))
-import torch
-from easydict import EasyDict
-from basicts.runners import SimpleTimeSeriesForecastingRunner
-from basicts.data import TimeSeriesForecastingDataset
-from basicts.losses import masked_mae
-
-from .arch import WaveNet
-
-CFG = EasyDict()
-
-# ================= general ================= #
-CFG.DESCRIPTION = "WaveNet model configuration"
-CFG.RUNNER = SimpleTimeSeriesForecastingRunner
-CFG.DATASET_CLS = TimeSeriesForecastingDataset
-CFG.DATASET_NAME = "ETTh2"
-CFG.DATASET_TYPE = "Electricity Transformer Temperature"
-CFG.DATASET_INPUT_LEN = 336
-CFG.DATASET_OUTPUT_LEN = 336
-CFG.GPU_NUM = 1
-
-# ================= environment ================= #
-CFG.ENV = EasyDict()
-CFG.ENV.SEED = 1
-CFG.ENV.CUDNN = EasyDict()
-CFG.ENV.CUDNN.ENABLED = True
-
-# ================= model ================= #
-CFG.MODEL = EasyDict()
-CFG.MODEL.NAME = "WaveNet"
-CFG.MODEL.ARCH = WaveNet
-CFG.MODEL.PARAM = {
- "in_dim": 1,
- "out_dim": CFG.DATASET_OUTPUT_LEN,
- "residual_channels": 16,
- "dilation_channels": 16,
- "skip_channels": 64,
- "end_channels": 128,
- "kernel_size": 12,
- "blocks": 6,
- "layers": 3
-}
-CFG.MODEL.FORWARD_FEATURES = [0]
-CFG.MODEL.TARGET_FEATURES = [0]
-
-# ================= optim ================= #
-CFG.TRAIN = EasyDict()
-CFG.TRAIN.LOSS = masked_mae
-CFG.TRAIN.OPTIM = EasyDict()
-CFG.TRAIN.OPTIM.TYPE = "Adam"
-CFG.TRAIN.OPTIM.PARAM = {
- "lr": 0.002,
- "weight_decay": 0.0001,
-}
-
-# ================= train ================= #
-CFG.TRAIN.NUM_EPOCHS = 20
-CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
- "checkpoints",
- "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
-)
-# train data
-CFG.TRAIN.DATA = EasyDict()
-# read data
-CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TRAIN.DATA.BATCH_SIZE = 64
-CFG.TRAIN.DATA.PREFETCH = False
-CFG.TRAIN.DATA.SHUFFLE = True
-CFG.TRAIN.DATA.NUM_WORKERS = 2
-CFG.TRAIN.DATA.PIN_MEMORY = False
-
-# ================= validate ================= #
-CFG.VAL = EasyDict()
-CFG.VAL.INTERVAL = 1
-# validating data
-CFG.VAL.DATA = EasyDict()
-# read data
-CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.VAL.DATA.BATCH_SIZE = 64
-CFG.VAL.DATA.PREFETCH = False
-CFG.VAL.DATA.SHUFFLE = False
-CFG.VAL.DATA.NUM_WORKERS = 2
-CFG.VAL.DATA.PIN_MEMORY = False
-
-# ================= test ================= #
-CFG.TEST = EasyDict()
-CFG.TEST.INTERVAL = 1
-# test data
-CFG.TEST.DATA = EasyDict()
-# read data
-CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
-# dataloader args, optional
-CFG.TEST.DATA.BATCH_SIZE = 64
-CFG.TEST.DATA.PREFETCH = False
-CFG.TEST.DATA.SHUFFLE = False
-CFG.TEST.DATA.NUM_WORKERS = 2
-CFG.TEST.DATA.PIN_MEMORY = False
-
-# ================= evaluate ================= #
-CFG.EVAL = EasyDict()
-CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
diff --git a/baselines/WaveNet/METR-LA.py b/baselines/WaveNet/METR-LA.py
new file mode 100644
index 00000000..a72e5030
--- /dev/null
+++ b/baselines/WaveNet/METR-LA.py
@@ -0,0 +1,142 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
+
+from .arch import WaveNet
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = WaveNet
+MODEL_PARAM = {
+ "in_dim": 1,
+ "out_dim": OUTPUT_LEN,
+ "residual_channels": 16,
+ "dilation_channels": 16,
+ "skip_channels": 64,
+ "end_channels": 128,
+ "kernel_size": 12,
+ "blocks": 6,
+ "layers": 3
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MSE': masked_mse
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+ "lr": 0.002,
+ "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ "milestones": [1, 50],
+ "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+ "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/basicts/__init__.py b/basicts/__init__.py
index db1ee888..e136d21c 100644
--- a/basicts/__init__.py
+++ b/basicts/__init__.py
@@ -1,6 +1,6 @@
-from .launcher import launch_training, launch_runner
+from .launcher import launch_training, launch_evaluation
from .runners import BaseRunner
-__version__ = "0.3.12"
+__version__ = '0.4.0'
-__all__ = ["__version__", "launch_training", "launch_runner", "BaseRunner"]
+__all__ = ['__version__', 'launch_training', 'launch_evaluation', 'BaseRunner']
diff --git a/basicts/archs/example_arch.py b/basicts/archs/example_arch.py
deleted file mode 100644
index 6f1fd90e..00000000
--- a/basicts/archs/example_arch.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import torch
-from torch import nn
-
-class MultiLayerPerceptron(nn.Module):
- """Two fully connected layer."""
-
- def __init__(self, history_seq_len: int, prediction_seq_len: int, hidden_dim: int):
- super().__init__()
- self.fc1 = nn.Linear(history_seq_len, hidden_dim)
- self.fc2 = nn.Linear(hidden_dim, prediction_seq_len)
- self.act = nn.ReLU()
-
- def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
- """Feedforward function of MLP.
-
- Args:
- history_data (torch.Tensor): inputs with shape [B, L, N, C].
-
- Returns:
- torch.Tensor: outputs with shape [B, L, N, C]
- """
-
- history_data = history_data[..., 0].transpose(1, 2) # B, N, L
- prediction = self.fc2(self.act(self.fc1(history_data))).transpose(1, 2) # B, L, N
- return prediction.unsqueeze(-1) # B, L, N, C
diff --git a/basicts/data/__init__.py b/basicts/data/__init__.py
index b0b48d8c..60a24036 100644
--- a/basicts/data/__init__.py
+++ b/basicts/data/__init__.py
@@ -1,12 +1,4 @@
-import os
+from .base_dataset import BaseDataset
+from .simple_tsf_dataset import TimeSeriesForecastingDataset
-from ..utils.misc import scan_modules
-from .registry import SCALER_REGISTRY
-from .dataset_zoo.simple_tsf_dataset import TimeSeriesForecastingDataset
-from .dataset_zoo.m4_dataset import M4ForecastingDataset
-
-__all__ = ["SCALER_REGISTRY", "TimeSeriesForecastingDataset", "M4ForecastingDataset"]
-
-# fix bugs on Windows systems and on jupyter
-project_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-scan_modules(project_dir, __file__, ["__init__.py", "registry.py"], ["dataset_zoo/", ".ipynb_checkpoints/"])
+__all__ = ['BaseDataset', 'TimeSeriesForecastingDataset']
diff --git a/basicts/data/base_dataset.py b/basicts/data/base_dataset.py
new file mode 100644
index 00000000..ffb27d9c
--- /dev/null
+++ b/basicts/data/base_dataset.py
@@ -0,0 +1,100 @@
+from typing import List
+from dataclasses import dataclass
+import numpy as np
+from torch.utils.data import Dataset
+
+
+@dataclass
+class BaseDataset(Dataset):
+ """
+ An abstract base class for creating datasets for time series forecasting in PyTorch.
+
+ This class provides a structured template for defining custom datasets by specifying methods
+ to load data and descriptions, and to access individual samples. It is designed to be subclassed
+ with specific implementations for different types of time series data.
+
+ Attributes:
+ dataset_name (str): The name of the dataset which is used for identifying the dataset uniquely.
+ train_val_test_ratio (List[float]): Ratios for splitting the dataset into training, validation,
+ and testing sets respectively. Each value in the list should sum to 1.0.
+ mode (str): Operational mode of the dataset. Valid values are "train", "valid", or "test".
+ input_len (int): The length of the input sequence, i.e., the number of historical data points used.
+ output_len (int): The length of the output sequence, i.e., the number of future data points predicted.
+ overlap (bool): Flag to indicate whether the splits between training, validation, and testing can overlap.
+ Defaults to True but can be set to False to enforce non-overlapping data in different sets.
+ """
+
+ dataset_name: str
+ train_val_test_ratio: List[float]
+ mode: str
+ input_len: int
+ output_len: int
+ overlap: bool = True
+
+ def _load_description(self) -> dict:
+ """
+ Abstract method to load a dataset's description from a file or source.
+
+ This method should be implemented by subclasses to load and return the dataset's metadata,
+ such as its shape, range, or other relevant properties, typically from a JSON or similar file.
+
+ Returns:
+ dict: A dictionary containing the dataset's metadata.
+
+ Raises:
+ NotImplementedError: If the method has not been implemented by a subclass.
+ """
+
+ raise NotImplementedError("Subclasses must implement this method.")
+
+ def _load_data(self) -> np.ndarray:
+ """
+ Abstract method to load the dataset and organize it based on the specified mode.
+
+ This method should be implemented by subclasses to load actual time series data into an array,
+ handling any necessary preprocessing and partitioning according to the specified `mode`.
+
+ Returns:
+ np.ndarray: The loaded and appropriately split dataset array.
+
+ Raises:
+ NotImplementedError: If the method has not been implemented by a subclass.
+ """
+
+ raise NotImplementedError("Subclasses must implement this method.")
+
+ def __len__(self) -> int:
+ """
+ Abstract method to get the total number of samples available in the dataset.
+
+ This method should be implemented by subclasses to calculate and return the total number of valid
+ samples available for training, validation, or testing based on the configuration and dataset size.
+
+ Returns:
+ int: The total number of samples.
+
+ Raises:
+ NotImplementedError: If the method has not been implemented by a subclass.
+ """
+
+ raise NotImplementedError("Subclasses must implement this method.")
+
+ def __getitem__(self, idx: int) -> dict:
+ """
+ Abstract method to retrieve a single sample from the dataset.
+
+ This method should be implemented by subclasses to access and return a specific sample from the dataset,
+ given an index. It should handle the slicing of input and output sequences according to the defined
+ `input_len` and `output_len`.
+
+ Args:
+ idx (int): The index of the sample to retrieve.
+
+ Returns:
+ dict: A dictionary containing the input sequence ('inputs') and output sequence ('target').
+
+ Raises:
+ NotImplementedError: If the method has not been implemented by a subclass.
+ """
+
+ raise NotImplementedError("Subclasses must implement this method.")
diff --git a/basicts/data/dataset_zoo/m4_dataset.py b/basicts/data/dataset_zoo/m4_dataset.py
deleted file mode 100644
index ba534efd..00000000
--- a/basicts/data/dataset_zoo/m4_dataset.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import os
-import random
-
-import torch
-from torch.utils.data import Dataset
-
-from ...utils import load_pkl
-
-
-class M4ForecastingDataset(Dataset):
- """
- BasicTS tries its best to follow the commonly-used processing approaches of M4 dataset, while also providing more flexible interfaces.
- M4 dataset differs from general MTS datasets in the following aspects:
- - M4 dataset is a univariate time series dataset, which does not sample in a synchronized manner.
- In the state-of-the-art M4 prediction solutions, NBeats [1], the authors first sample ids of the time series and then randomly sample the time series data for each time series.
- - Padding and masking are used to make training more flexible and robust.
- - There is no normalization in M4 dataset.
- - There is no validation dataset in M4 dataset.
- - The test data is the last sample of each time series.
- - The future sequence length is fixed for different subsets.
-
- Reference:
- [1] N-BEATS: Neural basis expansion analysis for interpretable time series forecasting
- [2] https://github.com/ServiceNow/N-BEATS/blob/master/common/sampler.py
- """
-
- def __init__(self, data_file_path: str, index_file_path: str, mask_file_path: str, mode: str) -> None:
- super().__init__()
- assert mode in ["train", "test"], "error mode"
- self._check_if_file_exists(data_file_path, index_file_path, mask_file_path)
- # read raw data (normalized)
- self.data = load_pkl(data_file_path)[mode] # padded data: List[List]
- self.mask = load_pkl(mask_file_path)[mode] # padded mask: List[List]
- # read index
- self.index = load_pkl(index_file_path)[mode] # train/test index of each time series: List[List]
-
- def _check_if_file_exists(self, data_file_path: str, index_file_path: str, mask_file_path: str):
- """Check if data file and index file exist.
-
- Args:
- data_file_path (str): data file path
- index_file_path (str): index file path
-
- Raises:
- FileNotFoundError: no data file
- FileNotFoundError: no index file
- """
-
- if not os.path.isfile(data_file_path):
- raise FileNotFoundError("BasicTS can not find data file {0}".format(data_file_path))
- if not os.path.isfile(index_file_path):
- raise FileNotFoundError("BasicTS can not find index file {0}".format(index_file_path))
- if not os.path.isfile(mask_file_path):
- raise FileNotFoundError("BasicTS can not find mask file {0}".format(mask_file_path))
-
- def __getitem__(self, ts_id: int) -> tuple:
- """Get a sample.
-
- Args:
- ts_id (int): the iteration index, i.e., the time series id (not the self.index).
-
- Returns:
- tuple: future_data, history_data, future_mask, history_mask, where the shape of data is L x C and mask is L.
- """
-
- ts_idxs = list(self.index[ts_id])
- # random select a time series sample
- idx = ts_idxs[random.randint(0, len(ts_idxs)-1)]
-
- history_data = torch.Tensor(self.data[ts_id][idx[0]:idx[1]]).unsqueeze(1).float()
- future_data = torch.Tensor(self.data[ts_id][idx[1]:idx[2]]).unsqueeze(1).float()
- history_mask = torch.Tensor(self.mask[ts_id][idx[0]:idx[1]]).unsqueeze(1).float()
- future_mask = torch.Tensor(self.mask[ts_id][idx[1]:idx[2]]).unsqueeze(1).float()
-
- return future_data, history_data, future_mask, history_mask
-
- def __len__(self):
- """Dataset length (=number of time series)
-
- Returns:
- int: dataset length
- """
-
- return len(self.data)
diff --git a/basicts/data/dataset_zoo/simple_tsf_dataset.py b/basicts/data/dataset_zoo/simple_tsf_dataset.py
deleted file mode 100644
index 2fa38d7a..00000000
--- a/basicts/data/dataset_zoo/simple_tsf_dataset.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-
-import torch
-from torch.utils.data import Dataset
-
-from ...utils import load_pkl
-
-
-class TimeSeriesForecastingDataset(Dataset):
- """Time series forecasting dataset."""
-
- def __init__(self, data_file_path: str, index_file_path: str, mode: str) -> None:
- super().__init__()
- assert mode in ["train", "valid", "test"], "error mode"
- self._check_if_file_exists(data_file_path, index_file_path)
- # read raw data (normalized)
- data = load_pkl(data_file_path)
- processed_data = data["processed_data"]
- self.data = torch.from_numpy(processed_data).float()
- # read index
- self.index = load_pkl(index_file_path)[mode]
-
- def _check_if_file_exists(self, data_file_path: str, index_file_path: str):
- """Check if data file and index file exist.
-
- Args:
- data_file_path (str): data file path
- index_file_path (str): index file path
-
- Raises:
- FileNotFoundError: no data file
- FileNotFoundError: no index file
- """
-
- if not os.path.isfile(data_file_path):
- raise FileNotFoundError("BasicTS can not find data file {0}".format(data_file_path))
- if not os.path.isfile(index_file_path):
- raise FileNotFoundError("BasicTS can not find index file {0}".format(index_file_path))
-
- def __getitem__(self, index: int) -> tuple:
- """Get a sample.
-
- Args:
- index (int): the iteration index (not the self.index)
-
- Returns:
- tuple: (future_data, history_data), where the shape of each is L x N x C.
- """
-
- idx = list(self.index[index])
- if isinstance(idx[0], int):
- # continuous index
- history_data = self.data[idx[0]:idx[1]]
- future_data = self.data[idx[1]:idx[2]]
- else:
- # discontinuous index or custom index
- # NOTE: current time $t$ should not included in the index[0]
- history_index = idx[0] # list
- assert idx[1] not in history_index, "current time t should not included in the idx[0]"
- history_index.append(idx[1])
- history_data = self.data[history_index]
- future_data = self.data[idx[1], idx[2]]
-
- return future_data, history_data
-
- def __len__(self):
- """Dataset length
-
- Returns:
- int: dataset length
- """
-
- return len(self.index)
diff --git a/basicts/data/registry.py b/basicts/data/registry.py
deleted file mode 100644
index 826969df..00000000
--- a/basicts/data/registry.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from easytorch.utils.registry import Registry
-
-SCALER_REGISTRY = Registry("Scaler")
diff --git a/basicts/data/simple_tsf_dataset.py b/basicts/data/simple_tsf_dataset.py
new file mode 100644
index 00000000..231696eb
--- /dev/null
+++ b/basicts/data/simple_tsf_dataset.py
@@ -0,0 +1,124 @@
+import json
+from typing import List
+
+import numpy as np
+
+from .base_dataset import BaseDataset
+
+
+class TimeSeriesForecastingDataset(BaseDataset):
+ """
+ A dataset class for time series forecasting problems, handling the loading, parsing, and partitioning
+ of time series data into training, validation, and testing sets based on provided ratios.
+
+ This class supports configurations where sequences may or may not overlap, accommodating scenarios
+ where time series data is drawn from continuous periods or distinct episodes, affecting how
+ the data is split into batches for model training or evaluation.
+
+ Attributes:
+ data_file_path (str): Path to the file containing the time series data.
+ description_file_path (str): Path to the JSON file containing the description of the dataset.
+ data (np.ndarray): The loaded time series data array, split according to the specified mode.
+ description (dict): Metadata about the dataset, such as shape and other properties.
+ """
+
+ def __init__(self, dataset_name: str, train_val_test_ratio: List[float], mode: str, input_len: int, output_len: int, overlap: bool = True) -> None:
+ """
+ Initializes the TimeSeriesForecastingDataset by setting up paths, loading data, and
+ preparing it according to the specified configurations.
+
+ Args:
+ dataset_name (str): The name of the dataset.
+ train_val_test_ratio (List[float]): Ratios for splitting the dataset into train, validation, and test sets.
+ Each value should be a float between 0 and 1, and their sum should ideally be 1.
+ mode (str): The operation mode of the dataset. Valid values are 'train', 'valid', or 'test'.
+ input_len (int): The length of the input sequence (number of historical points).
+ output_len (int): The length of the output sequence (number of future points to predict).
+ overlap (bool): Flag to determine if training/validation/test splits should overlap.
+ Defaults to True. Set to False for strictly non-overlapping periods.
+
+ Raises:
+ AssertionError: If `mode` is not one of ['train', 'valid', 'test'].
+ """
+ assert mode in ['train', 'valid', 'test'], f"Invalid mode: {mode}. Must be one of ['train', 'valid', 'test']."
+ super().__init__(dataset_name, train_val_test_ratio, mode, input_len, output_len, overlap)
+
+ self.data_file_path = f'datasets/{dataset_name}/data.dat'
+ self.description_file_path = f'datasets/{dataset_name}/desc.json'
+ self.description = self._load_description()
+ self.data = self._load_data()
+
+ def _load_description(self) -> dict:
+ """
+ Loads the description of the dataset from a JSON file.
+
+ Returns:
+ dict: A dictionary containing metadata about the dataset, such as its shape and other properties.
+
+ Raises:
+ FileNotFoundError: If the description file is not found.
+ json.JSONDecodeError: If there is an error decoding the JSON data.
+ """
+
+ try:
+ with open(self.description_file_path, 'r') as f:
+ return json.load(f)
+ except FileNotFoundError as e:
+ raise FileNotFoundError(f'Description file not found: {self.description_file_path}') from e
+ except json.JSONDecodeError as e:
+ raise ValueError(f'Error decoding JSON file: {self.description_file_path}') from e
+
+ def _load_data(self) -> np.ndarray:
+ """
+ Loads the time series data from a file and splits it according to the selected mode.
+
+ Returns:
+ np.ndarray: The data array for the specified mode (train, validation, or test).
+
+ Raises:
+ ValueError: If there is an issue with loading the data file or if the data shape is not as expected.
+ """
+
+ try:
+ data = np.memmap(self.data_file_path, dtype='float32', mode='r', shape=tuple(self.description['shape']))
+ except (FileNotFoundError, ValueError) as e:
+ raise ValueError(f'Error loading data file: {self.data_file_path}') from e
+
+ total_len = len(data)
+ train_len = int(total_len * self.train_val_test_ratio[0])
+ valid_len = int(total_len * self.train_val_test_ratio[1])
+
+ if self.mode == 'train':
+ offset = self.output_len if self.overlap else 0
+ return data[:train_len + offset].copy()
+ elif self.mode == 'valid':
+ offset_left = self.input_len - 1 if self.overlap else 0
+ offset_right = self.output_len if self.overlap else 0
+ return data[train_len - offset_left : train_len + valid_len + offset_right].copy()
+ else: # self.mode == 'test'
+ offset = self.input_len - 1 if self.overlap else 0
+ return data[train_len + valid_len - offset:].copy()
+
+ def __getitem__(self, index: int) -> dict:
+ """
+ Retrieves a sample from the dataset at the specified index, considering both the input and output lengths.
+
+ Args:
+ index (int): The index of the desired sample in the dataset.
+
+ Returns:
+ dict: A dictionary containing 'inputs' and 'target', where both are slices of the dataset corresponding to
+ the historical input data and future prediction data, respectively.
+ """
+ history_data = self.data[index:index + self.input_len]
+ future_data = self.data[index + self.input_len:index + self.input_len + self.output_len]
+ return {'inputs': history_data, 'target': future_data}
+
+ def __len__(self) -> int:
+ """
+ Calculates the total number of samples available in the dataset, adjusted for the lengths of input and output sequences.
+
+ Returns:
+ int: The number of valid samples that can be drawn from the dataset, based on the configurations of input and output lengths.
+ """
+ return len(self.data) - self.input_len - self.output_len + 1
diff --git a/basicts/data/transform.py b/basicts/data/transform.py
deleted file mode 100644
index 1f7d9655..00000000
--- a/basicts/data/transform.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import pickle
-
-import torch
-import numpy as np
-
-from .registry import SCALER_REGISTRY
-
-
-@SCALER_REGISTRY.register()
-def standard_transform(data: np.array, output_dir: str, train_index: list, history_seq_len: int, future_seq_len: int, norm_each_channel: int = False) -> np.array:
- """Standard normalization.
-
- Args:
- data (np.array): raw time series data.
- output_dir (str): output dir path.
- train_index (list): train index.
- history_seq_len (int): historical sequence length.
- future_seq_len (int): future sequence length.
- norm_each_channel (bool): whether to normalization each channel.
-
- Returns:
- np.array: normalized raw time series data.
- """
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
- # data: L, N, C, C=1
- data_train = data[:train_index[-1][1], ...]
- if norm_each_channel:
- mean, std = data_train.mean(axis=0, keepdims=True), data_train.std(axis=0, keepdims=True)
- else:
- mean, std = data_train[..., 0].mean(), data_train[..., 0].std()
-
- print("mean (training data):", mean)
- print("std (training data):", std)
- scaler = {}
- scaler["func"] = re_standard_transform.__name__
- scaler["args"] = {"mean": mean, "std": std}
- # label to identify the scaler for different settings.
- with open(output_dir + "/scaler_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(scaler, f)
-
- def normalize(x):
- return (x - mean) / std
-
- data_norm = normalize(data)
- return data_norm
-
-
-@SCALER_REGISTRY.register()
-def re_standard_transform(data: torch.Tensor, **kwargs) -> torch.Tensor:
- """Standard re-transformation.
-
- Args:
- data (torch.Tensor): input data.
-
- Returns:
- torch.Tensor: re-scaled data.
- """
-
- mean, std = kwargs["mean"], kwargs["std"]
- if isinstance(mean, np.ndarray):
- mean = torch.from_numpy(mean).type_as(data).to(data.device).unsqueeze(0)
- std = torch.from_numpy(std).type_as(data).to(data.device).unsqueeze(0)
- data = data * std
- data = data + mean
- return data
-
-
-@SCALER_REGISTRY.register()
-def min_max_transform(data: np.array, output_dir: str, train_index: list, history_seq_len: int, future_seq_len: int) -> np.array:
- """Min-max normalization.
-
- Args:
- data (np.array): raw time series data.
- output_dir (str): output dir path.
- train_index (list): train index.
- history_seq_len (int): historical sequence length.
- future_seq_len (int): future sequence length.
-
- Returns:
- np.array: normalized raw time series data.
- """
-
- # L, N, C, C=1
- data_train = data[:train_index[-1][1], ...]
-
- min_value = data_train.min(axis=(0, 1), keepdims=False)[0]
- max_value = data_train.max(axis=(0, 1), keepdims=False)[0]
-
- print("min: (training data)", min_value)
- print("max: (training data)", max_value)
- scaler = {}
- scaler["func"] = re_min_max_transform.__name__
- scaler["args"] = {"min_value": min_value, "max_value": max_value}
- # label to identify the scaler for different settings.
- # To be fair, only one transformation can be implemented per dataset.
- # TODO: Therefore we (for now) do not distinguish between the data produced by the different transformation methods.
- with open(output_dir + "/scaler_in_{0}_out_{1}.pkl".format(history_seq_len, future_seq_len), "wb") as f:
- pickle.dump(scaler, f)
-
- def normalize(x):
- # ref:
- # https://github.com/guoshnBJTU/ASTGNN/blob/f0f8c2f42f76cc3a03ea26f233de5961c79c9037/lib/utils.py#L17
- x = 1. * (x - min_value) / (max_value - min_value)
- x = 2. * x - 1.
- return x
-
- data_norm = normalize(data)
- return data_norm
-
-
-@SCALER_REGISTRY.register()
-def re_min_max_transform(data: torch.Tensor, **kwargs) -> torch.Tensor:
- """Standard re-min-max transform.
-
- Args:
- data (torch.Tensor): input data.
-
- Returns:
- torch.Tensor: re-scaled data.
- """
-
- min_value, max_value = kwargs["min_value"], kwargs["max_value"]
- # ref:
- # https://github.com/guoshnBJTU/ASTGNN/blob/f0f8c2f42f76cc3a03ea26f233de5961c79c9037/lib/utils.py#L23
- data = (data + 1.) / 2.
- data = 1. * data * (max_value - min_value) + min_value
- return data
diff --git a/basicts/launcher.py b/basicts/launcher.py
index b2149818..7b61237c 100644
--- a/basicts/launcher.py
+++ b/basicts/launcher.py
@@ -1,32 +1,118 @@
-from packaging import version
-from typing import Callable, Dict, Union, Tuple
-
+import os
+from typing import Dict, Union, Optional
+import traceback
import easytorch
+from easytorch.utils import get_logger, set_visible_devices
+from easytorch.config import init_cfg
+from easytorch.device import set_device_type
+
+def evaluation_func(cfg: Dict,
+ ckpt_path: str = None,
+ batch_size: Optional[int] = None,
+ strict: bool = True) -> None:
+ """
+ Starts the evaluation process.
+
+ This function performs the following steps:
+ 1. Initializes the runner specified in the configuration (`cfg`).
+ 2. Sets up logging for the evaluation process.
+ 3. Loads the model checkpoint.
+ 4. Executes the test pipeline using the initialized runner.
+ Args:
+ cfg (Dict): EasyTorch configuration dictionary.
+ ckpt_path (str): Path to the model checkpoint. If not provided, the best model checkpoint is loaded automatically.
+ batch_size (Optional[int]): Batch size for evaluation. If not specified,
+ it should be defined in the config. Defaults to None.
+ strict (bool): Enforces that the checkpoint keys match the model. Defaults to True.
+
+ Raises:
+ Exception: Catches any exception, logs the traceback, and re-raises it.
+ """
-def launch_runner(cfg: Union[Dict, str], fn: Callable, args: Tuple = (), device_type: str = "gpu", devices: str = None):
- easytorch_version = easytorch.__version__
- if version.parse(easytorch_version) >= version.parse("1.3"):
- easytorch.launch_runner(cfg=cfg, fn=fn, args=args, device_type=device_type, devices=devices)
- else:
- easytorch.launch_runner(cfg=cfg, fn=fn, args=args, gpus=devices)
+ # initialize the runner
+ logger = get_logger('easytorch-launcher')
+ logger.info(f"Initializing runner '{cfg['RUNNER']}'")
+ runner = cfg['RUNNER'](cfg)
-def launch_training(cfg: Union[Dict, str], gpus: str = None, node_rank: int = 0):
- """Extended easytorch launch_training.
+ # initialize the logger for the runner
+ runner.init_logger(logger_name='easytorch-evaluation', log_file_name='evaluation_log')
+
+ try:
+ # set batch size if provided
+ if batch_size is not None:
+ cfg.TEST.DATA.BATCH_SIZE = batch_size
+ else:
+ assert 'BATCH_SIZE' in cfg.TEST.DATA, 'Batch size must be specified either in the config or as an argument.'
+
+ # load the model checkpoint
+ if ckpt_path is None or not os.path.exists(ckpt_path):
+ ckpt_path_auto = os.path.join(runner.ckpt_save_dir, '{}_best_val_{}.pt'.format(runner.model_name, runner.target_metrics.replace('/', '_')))
+ logger.info(f'Checkpoint file not found at {ckpt_path}. Loading the best model checkpoint `{ckpt_path_auto}` automatically.')
+ if not os.path.exists(ckpt_path_auto):
+ raise FileNotFoundError(f'Checkpoint file not found at {ckpt_path}')
+ runner.load_model(ckpt_path=ckpt_path_auto, strict=strict)
+ else:
+ logger.info(f'Loading model checkpoint from {ckpt_path}')
+ runner.load_model(ckpt_path=ckpt_path, strict=strict)
+
+ # start the evaluation pipeline
+ runner.test_pipeline(cfg=cfg, save_metrics=True, save_results=True)
+
+ except BaseException as e:
+ # log the exception and re-raise it
+ runner.logger.error(traceback.format_exc())
+ raise e
+
+def launch_evaluation(cfg: Union[Dict, str],
+ ckpt_path: str,
+ device_type: str = 'gpu',
+ gpus: Optional[str] = None,
+ batch_size: Optional[int] = None) -> None:
+ """
+ Launches the evaluation process using EasyTorch.
Args:
- cfg (Union[Dict, str]): Easytorch config.
- gpus (str): set ``CUDA_VISIBLE_DEVICES`` environment variable.
- node_rank (int): Rank of the current node.
+ cfg (Union[Dict, str]): EasyTorch configuration as a dictionary or a path to a config file.
+ ckpt_path (str): Path to the model checkpoint.
+ device_type (str, optional): Device type to use ('cpu' or 'gpu'). Defaults to 'gpu'.
+ gpus (Optional[str]): GPU device IDs to use. Defaults to None (use all available GPUs).
+ batch_size (Optional[int]): Batch size for evaluation. Defaults to None (use value from config).
+
+ Raises:
+ AssertionError: If the batch size is not specified in either the config or as an argument.
+ """
+
+ logger = get_logger('easytorch-launcher')
+ logger.info('Launching EasyTorch evaluation.')
+
+ # initialize the configuration
+ cfg = init_cfg(cfg, save=True)
+
+ # set the device type (CPU, GPU, or MLU)
+ set_device_type(device_type)
+
+ # set the visible GPUs if the device type is not CPU
+ if device_type != 'cpu':
+ set_visible_devices(gpus)
+
+ # run the evaluation process
+ evaluation_func(cfg, ckpt_path, batch_size)
+
+def launch_training(cfg: Union[Dict, str],
+ gpus: Optional[str] = None,
+ node_rank: int = 0) -> None:
+ """
+ Launches the training process using EasyTorch.
+
+ Args:
+ cfg (Union[Dict, str]): EasyTorch configuration as a dictionary or a path to a config file.
+ gpus (Optional[str]): GPU device IDs to use. Defaults to None (use all available GPUs).
+ node_rank (int, optional): Rank of the current node in distributed training. Defaults to 0.
"""
- # pre-processing of some possible future features, such as:
- # registering model, runners.
- # config checking
+ # placeholder for potential pre-processing steps (e.g., model registration, config validation)
pass
- # launch training based on easytorch
- easytorch_version = easytorch.__version__
- if version.parse(easytorch_version) >= version.parse("1.3"):
- easytorch.launch_training(cfg=cfg, devices=gpus, node_rank=node_rank)
- else:
- easytorch.launch_training(cfg=cfg, gpus=gpus, node_rank=node_rank)
+
+ # launch the training process
+ easytorch.launch_training(cfg=cfg, devices=gpus, node_rank=node_rank)
diff --git a/basicts/losses/__init__.py b/basicts/losses/__init__.py
deleted file mode 100644
index eb9fdea0..00000000
--- a/basicts/losses/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .losses import l1_loss, l2_loss, masked_mae, masked_mape, masked_rmse, masked_mse
-
-__all__ = ["l1_loss", "l2_loss", "masked_mae", "masked_mape", "masked_rmse", "masked_mse"]
diff --git a/basicts/losses/losses.py b/basicts/losses/losses.py
deleted file mode 100644
index 6757bc24..00000000
--- a/basicts/losses/losses.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from typing import Optional
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-
-
-def l1_loss(prediction: torch.Tensor, target: torch._tensor, size_average: Optional[bool] = None, reduce: Optional[bool] = None, reduction: str = "mean") -> torch.Tensor:
- """unmasked mae."""
-
- return F.l1_loss(prediction, target, size_average=size_average, reduce=reduce, reduction=reduction)
-
-
-def l2_loss(prediction: torch.Tensor, target: torch.Tensor, size_average: Optional[bool] = None, reduce: Optional[bool] = None, reduction: str = "mean") -> torch.Tensor:
- """unmasked mse"""
-
- return F.mse_loss(prediction, target, size_average=size_average, reduce=reduce, reduction=reduction)
-
-
-def masked_mae(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
- """Masked mean absolute error.
-
- Args:
- prediction (torch.Tensor): predicted values
- target (torch.Tensor): labels
- null_val (float, optional): null value. Defaults to np.nan.
-
- Returns:
- torch.Tensor: masked mean absolute error
- """
-
- if np.isnan(null_val):
- mask = ~torch.isnan(target)
- else:
- eps = 5e-5
- mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
- mask = mask.float()
- mask /= torch.mean((mask))
- mask = torch.nan_to_num(mask)
- loss = torch.abs(prediction-target)
- loss = loss * mask
- loss = torch.nan_to_num(loss)
- return torch.mean(loss)
-
-
-def masked_mse(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
- """Masked mean squared error.
-
- Args:
- prediction (torch.Tensor): predicted values
- target (torch.Tensor): labels
- null_val (float, optional): null value. Defaults to np.nan.
-
- Returns:
- torch.Tensor: masked mean squared error
- """
-
- if np.isnan(null_val):
- mask = ~torch.isnan(target)
- else:
- eps = 5e-5
- mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
- mask = mask.float()
- mask /= torch.mean((mask))
- mask = torch.nan_to_num(mask)
- loss = (prediction-target)**2
- loss = loss * mask
- loss = torch.nan_to_num(loss)
- return torch.mean(loss)
-
-
-def masked_rmse(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
- """root mean squared error.
-
- Args:
- prediction (torch.Tensor): predicted values
- target (torch.Tensor): labels
- null_val (float, optional): null value . Defaults to np.nan.
-
- Returns:
- torch.Tensor: root mean squared error
- """
-
- return torch.sqrt(masked_mse(prediction=prediction, target=target, null_val=null_val))
-
-
-def masked_mape(prediction: torch.Tensor, target: torch.Tensor, null_val: float = 0.0) -> torch.Tensor:
- """Masked mean absolute percentage error.
-
- Args:
- prediction (torch.Tensor): predicted values
- target (torch.Tensor): labels
- null_val (float, optional): null value.
- In the mape metric, null_val is set to 0.0 and by all default.
- We keep this parameter for consistency, but we do not allow it to be changed.
-
- Returns:
- torch.Tensor: masked mean absolute percentage error
- """
- assert null_val == 0.0, "In the mape metric, null_val is set to 0.0 and by all default. \
- This parameter is kept for consistency, but it is not allowed to be changed."
-
- # delete small values to avoid abnormal results
- target = torch.where(torch.abs(target) < 1e-4, torch.zeros_like(target), target)
-
- # nan mask
- nan_mask = ~torch.isnan(target)
- # zero mask
- eps = 5e-5
- zero_mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
-
- mask = (nan_mask & zero_mask).float()
- mask /= torch.mean((mask))
- mask = torch.nan_to_num(mask)
- loss = torch.abs(torch.abs(prediction-target)/target)
- loss = loss * mask
- loss = torch.nan_to_num(loss)
- return torch.mean(loss)
diff --git a/basicts/metrics/__init__.py b/basicts/metrics/__init__.py
index c3839017..eaa7da28 100644
--- a/basicts/metrics/__init__.py
+++ b/basicts/metrics/__init__.py
@@ -1,2 +1,22 @@
-from ..losses import *
+from .mae import masked_mae
+from .mse import masked_mse
+from .rmse import masked_rmse
+from .mape import masked_mape
from .wape import masked_wape
+
+ALL_METRICS = {
+ 'MAE': masked_mae,
+ 'MSE': masked_mse,
+ 'RMSE': masked_rmse,
+ 'MAPE': masked_mape,
+ 'WAPE': masked_wape
+ }
+
+__all__ = [
+ 'masked_mae',
+ 'masked_mse',
+ 'masked_rmse',
+ 'masked_mape',
+ 'masked_wape',
+ 'ALL_METRICS'
+]
diff --git a/basicts/metrics/mae.py b/basicts/metrics/mae.py
new file mode 100644
index 00000000..d9163b86
--- /dev/null
+++ b/basicts/metrics/mae.py
@@ -0,0 +1,38 @@
+import torch
+import numpy as np
+
+def masked_mae(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
+ """
+ Calculate the Masked Mean Absolute Error (MAE) between the predicted and target values,
+ while ignoring the entries in the target tensor that match the specified null value.
+
+ This function is particularly useful for scenarios where the dataset contains missing or irrelevant
+ values (denoted by `null_val`) that should not contribute to the loss calculation. It effectively
+ masks these values to ensure they do not skew the error metrics.
+
+ Args:
+ prediction (torch.Tensor): The predicted values as a tensor.
+ target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`.
+ null_val (float, optional): The value considered as null or missing in the `target` tensor.
+ Default is `np.nan`. The function will mask all `NaN` values in the target.
+
+ Returns:
+ torch.Tensor: A scalar tensor representing the masked mean absolute error.
+
+ """
+
+ if np.isnan(null_val):
+ mask = ~torch.isnan(target)
+ else:
+ eps = 5e-5
+ mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.0)
+
+ mask = mask.float()
+ mask /= torch.mean(mask) # Normalize mask to avoid bias in the loss due to the number of valid entries
+ mask = torch.nan_to_num(mask) # Replace any NaNs in the mask with zero
+
+ loss = torch.abs(prediction - target)
+ loss = loss * mask # Apply the mask to the loss
+ loss = torch.nan_to_num(loss) # Replace any NaNs in the loss with zero
+
+ return torch.mean(loss)
diff --git a/basicts/metrics/mape.py b/basicts/metrics/mape.py
new file mode 100644
index 00000000..5a9ce6ce
--- /dev/null
+++ b/basicts/metrics/mape.py
@@ -0,0 +1,53 @@
+import torch
+import numpy as np
+
+def masked_mape(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
+ """
+ Calculate the Masked Mean Absolute Percentage Error (MAPE) between predicted and target values,
+ ignoring entries that are either zero or match the specified null value in the target tensor.
+
+ This function is particularly useful for time series or regression tasks where the target values may
+ contain zeros or missing values, which could otherwise distort the error calculation. The function
+ applies a mask to ensure these entries do not affect the resulting MAPE.
+
+ Args:
+ prediction (torch.Tensor): The predicted values as a tensor.
+ target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`.
+ null_val (float, optional): The value considered as null or missing in the `target` tensor.
+ Defaults to `np.nan`. The function will mask all `NaN` values in the target.
+
+ Returns:
+ torch.Tensor: A scalar tensor representing the masked mean absolute percentage error.
+
+ Details:
+ - The function creates two masks:
+ 1. `zero_mask`: This mask excludes entries in the `target` tensor that are close to zero,
+ since division by zero or near-zero values would result in extremely large or undefined errors.
+ 2. `null_mask`: This mask excludes entries in the `target` tensor that match the specified `null_val`.
+ If `null_val` is `np.nan`, the mask will exclude `NaN` values using `torch.isnan`.
+
+ - The final mask is the intersection of `zero_mask` and `null_mask`, ensuring that only valid, non-zero,
+ and non-null values contribute to the MAPE calculation.
+ """
+
+ # mask to exclude zero values in the target
+ zero_mask = ~torch.isclose(target, torch.tensor(0.0).to(target.device), atol=5e-5)
+
+ # mask to exclude null values in the target
+ if np.isnan(null_val):
+ null_mask = ~torch.isnan(target)
+ else:
+ eps = 5e-5
+ null_mask = ~torch.isclose(target, torch.tensor(null_val).to(target.device), atol=eps)
+
+ # combine zero and null masks
+ mask = (zero_mask & null_mask).float()
+
+ mask /= torch.mean(mask)
+ mask = torch.nan_to_num(mask)
+
+ loss = torch.abs((prediction - target) / target)
+ loss *= mask
+ loss = torch.nan_to_num(loss)
+
+ return torch.mean(loss)
diff --git a/basicts/metrics/mse.py b/basicts/metrics/mse.py
new file mode 100644
index 00000000..dde0fdad
--- /dev/null
+++ b/basicts/metrics/mse.py
@@ -0,0 +1,38 @@
+import torch
+import numpy as np
+
+def masked_mse(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
+ """
+ Calculate the Masked Mean Squared Error (MSE) between predicted and target values,
+ while ignoring the entries in the target tensor that match the specified null value.
+
+ This function is useful for scenarios where the dataset contains missing or irrelevant values
+ (denoted by `null_val`) that should not contribute to the loss calculation. The function applies
+ a mask to these values, ensuring they do not affect the error metric.
+
+ Args:
+ prediction (torch.Tensor): The predicted values as a tensor.
+ target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`.
+ null_val (float, optional): The value considered as null or missing in the `target` tensor.
+ Defaults to `np.nan`. The function will mask all `NaN` values in the target.
+
+ Returns:
+ torch.Tensor: A scalar tensor representing the masked mean squared error.
+
+ """
+
+ if np.isnan(null_val):
+ mask = ~torch.isnan(target)
+ else:
+ eps = 5e-5
+ mask = ~torch.isclose(target, torch.tensor(null_val).to(target.device), atol=eps)
+
+ mask = mask.float()
+ mask /= torch.mean(mask) # Normalize mask to maintain unbiased MSE calculation
+ mask = torch.nan_to_num(mask) # Replace any NaNs in the mask with zero
+
+ loss = (prediction - target) ** 2 # Compute squared error
+ loss *= mask # Apply mask to the loss
+ loss = torch.nan_to_num(loss) # Replace any NaNs in the loss with zero
+
+ return torch.mean(loss) # Return the mean of the masked loss
diff --git a/basicts/metrics/rmse.py b/basicts/metrics/rmse.py
new file mode 100644
index 00000000..06c62fb3
--- /dev/null
+++ b/basicts/metrics/rmse.py
@@ -0,0 +1,25 @@
+import torch
+import numpy as np
+
+from .mse import masked_mse
+
+def masked_rmse(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
+ """
+ Calculate the Masked Root Mean Squared Error (RMSE) between predicted and target values,
+ ignoring entries in the target tensor that match the specified null value.
+
+ This function is useful for evaluating model performance on datasets where some target values
+ may be missing or irrelevant (denoted by `null_val`). The RMSE provides a measure of the average
+ magnitude of errors, accounting only for the valid, non-null entries.
+
+ Args:
+ prediction (torch.Tensor): The predicted values as a tensor.
+ target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`.
+ null_val (float, optional): The value considered as null or missing in the `target` tensor.
+ Defaults to `np.nan`. The function will ignore all `NaN` values in the target.
+
+ Returns:
+ torch.Tensor: A scalar tensor representing the masked root mean squared error.
+ """
+
+ return torch.sqrt(masked_mse(prediction=prediction, target=target, null_val=null_val))
diff --git a/basicts/metrics/wape.py b/basicts/metrics/wape.py
index a423b1bc..ddaf4b8e 100644
--- a/basicts/metrics/wape.py
+++ b/basicts/metrics/wape.py
@@ -1,29 +1,36 @@
import torch
import numpy as np
-
def masked_wape(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor:
- """Masked weighted absolute percentage error (WAPE)
+ """
+ Calculate the Masked Weighted Absolute Percentage Error (WAPE) between predicted and target values,
+ ignoring entries in the target tensor that match the specified null value.
+
+ WAPE is a useful metric for measuring the average error relative to the magnitude of the target values,
+ making it particularly suitable for comparing errors across datasets or time series with different scales.
Args:
- prediction (torch.Tensor): predicted values
- target (torch.Tensor): labels
- null_val (float, optional): null value. Defaults to np.nan.
+ prediction (torch.Tensor): The predicted values as a tensor.
+ target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`.
+ null_val (float, optional): The value considered as null or missing in the `target` tensor.
+ Defaults to `np.nan`. The function will mask all `NaN` values in the target.
Returns:
- torch.Tensor: masked mean absolute error
+ torch.Tensor: A scalar tensor representing the masked weighted absolute percentage error.
"""
if np.isnan(null_val):
mask = ~torch.isnan(target)
else:
eps = 5e-5
- mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
+ mask = ~torch.isclose(target, torch.tensor(null_val).to(target.device), atol=eps)
+
mask = mask.float()
prediction, target = prediction * mask, target * mask
-
+
prediction = torch.nan_to_num(prediction)
target = torch.nan_to_num(target)
- loss = torch.sum(torch.abs(prediction-target)) / (torch.sum(torch.abs(target)) + 5e-5)
- return torch.mean(loss)
+ loss = torch.sum(torch.abs(prediction - target)) / (torch.sum(torch.abs(target)) + 5e-5)
+
+ return loss
diff --git a/basicts/runners/__init__.py b/basicts/runners/__init__.py
index 66d0c0ef..3badb7b1 100644
--- a/basicts/runners/__init__.py
+++ b/basicts/runners/__init__.py
@@ -2,8 +2,6 @@
from .base_tsf_runner import BaseTimeSeriesForecastingRunner
from .runner_zoo.simple_tsf_runner import SimpleTimeSeriesForecastingRunner
from .runner_zoo.no_bp_runner import NoBPRunner
-from .runner_zoo.m4_tsf_runner import M4ForecastingRunner
-__all__ = ["BaseRunner", "BaseTimeSeriesForecastingRunner",
- "SimpleTimeSeriesForecastingRunner", "NoBPRunner",
- "M4ForecastingRunner"]
+__all__ = ['BaseRunner', 'BaseTimeSeriesForecastingRunner',
+ 'SimpleTimeSeriesForecastingRunner', 'NoBPRunner']
diff --git a/basicts/runners/base_m4_runner.py b/basicts/runners/base_m4_runner.py
deleted file mode 100644
index 1723247e..00000000
--- a/basicts/runners/base_m4_runner.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import math
-import inspect
-import functools
-from typing import Tuple, Union, Dict
-
-import torch
-import numpy as np
-from easydict import EasyDict
-from easytorch.utils.dist import master_only
-
-from .base_runner import BaseRunner
-from ..data import SCALER_REGISTRY
-
-
-class BaseM4Runner(BaseRunner):
- """
- Runner for M4 dataset.
- - There is no validation set.
- - On training end, we inference on the test set and save the prediction results.
- - No metrics (but the loss). Since the evaluation is not done in this runner, thus no metrics are needed.
- """
-
- def __init__(self, cfg: dict):
- super().__init__(cfg)
- self.dataset_name = cfg["DATASET_NAME"]
- assert "M4" in self.dataset_name, "M4Runner only supports M4 dataset."
- # different datasets have different null_values, e.g., 0.0 or np.nan.
- self.null_val = cfg.get("NULL_VAL", np.nan) # consist with metric functions
- self.dataset_type = cfg.get("DATASET_TYPE", " ")
- self.if_rescale = None # no normalization in M4 dataset, so no need to rescale
-
- # setup graph
- self.need_setup_graph = cfg["MODEL"].get("SETUP_GRAPH", False)
-
- # define loss
- self.loss = cfg["TRAIN"]["LOSS"]
- # define metric
- self.metrics = cfg.get("METRICS", {"loss": self.loss})
- # curriculum learning for output. Note that this is different from the CL in Seq2Seq archs.
- self.cl_param = cfg["TRAIN"].get("CL", None)
- if self.cl_param is not None:
- self.warm_up_epochs = cfg["TRAIN"].CL.get("WARM_EPOCHS", 0)
- self.cl_epochs = cfg["TRAIN"].CL.get("CL_EPOCHS")
- self.prediction_length = cfg["TRAIN"].CL.get("PREDICTION_LENGTH")
- self.cl_step_size = cfg["TRAIN"].CL.get("STEP_SIZE", 1)
- # evaluation
- self.if_evaluate_on_gpu = cfg.get("EVAL", EasyDict()).get("USE_GPU", True) # evaluate on gpu or cpu (gpu is faster but may cause OOM)
- self.evaluation_horizons = [_ - 1 for _ in cfg.get("EVAL", EasyDict()).get("HORIZONS", range(1, 13))]
- assert len(self.evaluation_horizons) == 0 or min(self.evaluation_horizons) >= 0, "The horizon should start counting from 1."
- self.save_path = cfg.get("EVAL", EasyDict()).get("SAVE_PATH") # save path for inference results, should not be None
-
- def build_train_dataset(self, cfg: dict):
- """Build train dataset
-
- Args:
- cfg (dict): config
-
- Returns:
- train dataset (Dataset)
- """
- data_file_path = "{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
- index_file_path = "{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
- mask_file_path = "{0}/mask_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
-
- # build dataset args
- dataset_args = cfg.get("DATASET_ARGS", {})
- # three necessary arguments, data file path, corresponding index file path, and mode (train, valid, or test)
- dataset_args["data_file_path"] = data_file_path
- dataset_args["index_file_path"] = index_file_path
- dataset_args["mask_file_path"] = mask_file_path
- dataset_args["mode"] = "train"
-
- dataset = cfg["DATASET_CLS"](**dataset_args)
- print("train len: {0}".format(len(dataset)))
-
- batch_size = cfg["TRAIN"]["DATA"]["BATCH_SIZE"]
- self.iter_per_epoch = math.ceil(len(dataset) / batch_size)
-
- return dataset
-
- @staticmethod
- def build_test_dataset(cfg: dict):
- """Build val dataset
-
- Args:
- cfg (dict): config
-
- Returns:
- train dataset (Dataset)
- """
- data_file_path = "{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TEST"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
- index_file_path = "{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TEST"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
- mask_file_path = "{0}/mask_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TEST"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", None))
-
- # build dataset args
- dataset_args = cfg.get("DATASET_ARGS", {})
- # three necessary arguments, data file path, corresponding index file path, and mode (train, valid, or test)
- dataset_args["data_file_path"] = data_file_path
- dataset_args["index_file_path"] = index_file_path
- dataset_args["mask_file_path"] = mask_file_path
- dataset_args["mode"] = "test"
-
- dataset = cfg["DATASET_CLS"](**dataset_args)
-
- return dataset
-
- def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
-
- Args:
- data (tuple): data (future data, history ata).
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
-
- Returns:
- tuple: (prediction, real_value)
- """
- raise NotImplementedError()
-
- def setup_graph(self, cfg: dict, train: bool):
- """Setup all parameters and the computation graph.
- Implementation of many works (e.g., DCRNN, GTS) acts like TensorFlow, which creates parameters in the first feedforward process.
-
- Args:
- cfg (dict): config
- train (bool): training or inferencing
- """
-
- dataloader = self.build_test_data_loader(cfg=cfg) if not train else self.build_train_data_loader(cfg=cfg)
- data = next(enumerate(dataloader))[1] # get the first batch
- self.forward(data=data, epoch=1, iter_num=0, train=train)
-
- def count_parameters(self):
- """Count the number of parameters in the model."""
-
- num_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
- self.logger.info("Number of parameters: {0}".format(num_parameters))
-
- def init_training(self, cfg: dict):
- """Initialize training.
-
- Including loss, training meters, etc.
-
- Args:
- cfg (dict): config
- """
-
- # setup graph
- if self.need_setup_graph:
- self.setup_graph(cfg=cfg, train=True)
- self.need_setup_graph = False
- # init training
- super().init_training(cfg)
- # count parameters
- self.count_parameters()
- for key, _ in self.metrics.items():
- self.register_epoch_meter("train_"+key, "train", "{:.6f}")
-
- def init_test(self, cfg: dict):
- """Initialize test.
-
- Including test meters, etc.
-
- Args:
- cfg (dict): config
- """
-
- if self.need_setup_graph:
- self.setup_graph(cfg=cfg, train=False)
- self.need_setup_graph = False
- super().init_test(cfg)
- for key, _ in self.metrics.items():
- self.register_epoch_meter("test_"+key, "test", "{:.6f}")
-
- def curriculum_learning(self, epoch: int = None) -> int:
- """Calculate task level in curriculum learning.
-
- Args:
- epoch (int, optional): current epoch if in training process, else None. Defaults to None.
-
- Returns:
- int: task level
- """
-
- if epoch is None:
- return self.prediction_length
- epoch -= 1
- # generate curriculum length
- if epoch < self.warm_up_epochs:
- # still warm up
- cl_length = self.prediction_length
- else:
- _ = ((epoch - self.warm_up_epochs) // self.cl_epochs + 1) * self.cl_step_size
- cl_length = min(_, self.prediction_length)
- return cl_length
-
- def metric_forward(self, metric_func, args) -> torch.Tensor:
- """Computing metrics.
-
- Args:
- metric_func (function, functools.partial): metric function.
- args (Dict): arguments for metrics computation.
-
- Returns:
- torch.Tensor: metric value.
- """
- covariate_names = inspect.signature(metric_func).parameters.keys()
- args = {k: v for k, v in args.items() if k in covariate_names}
-
- if isinstance(metric_func, functools.partial):
- # support partial function
- # users can define their partial function in the config file
- # e.g., functools.partial(masked_mase, freq="4", null_val=np.nan)
- if "null_val" in covariate_names and "null_val" not in metric_func.keywords: # if null_val is required but not provided
- args["null_val"] = self.null_val
- metric_item = metric_func(**args)
- elif callable(metric_func):
- # is a function
- # filter out keys that are not in function arguments
- metric_item = metric_func(**args, null_val=self.null_val)
- else:
- raise TypeError("Unknown metric type: {0}".format(type(metric_func)))
- return metric_item
-
- def train_iters(self, epoch: int, iter_index: int, data: Union[torch.Tensor, Tuple]) -> torch.Tensor:
- """Training details.
-
- Args:
- data (Union[torch.Tensor, Tuple]): Data provided by DataLoader
- epoch (int): current epoch.
- iter_index (int): current iter.
-
- Returns:
- loss (torch.Tensor)
- """
-
- iter_num = (epoch-1) * self.iter_per_epoch + iter_index
- forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
- # re-scale data
- forward_return = self.rescale_data(forward_return)
- # loss
- if self.cl_param:
- cl_length = self.curriculum_learning(epoch=epoch)
- forward_return["prediction"] = forward_return["prediction"][:, :cl_length, :, :]
- forward_return["target"] = forward_return["target"][:, :cl_length, :, :]
- loss = self.metric_forward(self.loss, forward_return)
- # metrics
- for metric_name, metric_func in self.metrics.items():
- metric_item = self.metric_forward(metric_func, forward_return)
- self.update_epoch_meter("train_"+metric_name, metric_item.item())
- return loss
-
- def save_prediction(self, returns_all):
- """Evaluate the model on test data.
-
- Args:
- returns_all (Dict): must contain keys: inputs, prediction, target
- """
- prediction = returns_all["prediction"].detach().cpu().numpy()
- loss = self.metric_forward(self.loss, returns_all)
- self.update_epoch_meter("test_loss", loss.item())
- # save prediction as self.save_path/self.dataset_name.npy
- np.save("{0}/{1}.npy".format(self.save_path, self.dataset_name), prediction)
-
- @torch.no_grad()
- @master_only
- def test(self):
- """Evaluate the model.
-
- Args:
- train_epoch (int, optional): current epoch if in training process.
- """
-
- # TODO: fix OOM: especially when inputs, targets, and predictions are saved at the same time.
- # test loop
- prediction =[]
- target = []
- inputs = []
- for _, data in enumerate(self.test_data_loader):
- forward_return = self.forward(data, epoch=None, iter_num=None, train=False)
- if not self.if_evaluate_on_gpu:
- forward_return["prediction"] = forward_return["prediction"].detach().cpu()
- forward_return["target"] = forward_return["target"].detach().cpu()
- forward_return["inputs"] = forward_return["inputs"].detach().cpu()
- prediction.append(forward_return["prediction"])
- target.append(forward_return["target"])
- inputs.append(forward_return["inputs"])
- prediction = torch.cat(prediction, dim=0)
- target = torch.cat(target, dim=0)
- inputs = torch.cat(inputs, dim=0)
- # re-scale data
- returns_all = self.rescale_data({"prediction": prediction, "target": target, "inputs": inputs})
- # evaluate
- self.save_prediction(returns_all)
-
- def rescale_data(self, input_data: Dict) -> Dict:
- """Rescale data.
-
- Args:
- data (Dict): Dict of data to be re-scaled.
-
- Returns:
- Dict: Dict re-scaled data.
- """
-
- if self.if_rescale:
- input_data["prediction"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["prediction"], **self.scaler["args"])
- input_data["target"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["target"], **self.scaler["args"])
- input_data["inputs"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["inputs"], **self.scaler["args"])
- return input_data
diff --git a/basicts/runners/base_runner.py b/basicts/runners/base_runner.py
index f827f9e4..c478e4b7 100644
--- a/basicts/runners/base_runner.py
+++ b/basicts/runners/base_runner.py
@@ -1,10 +1,12 @@
+import os
import time
-from typing import Dict
+from typing import Dict, Optional
-import setproctitle
import torch
+import setproctitle
from torch import nn
from torch.utils.data import DataLoader
+
from easytorch import Runner
from easytorch.utils import master_only
from easytorch.core.data_loader import build_data_loader
@@ -12,130 +14,148 @@
class BaseRunner(Runner):
"""
- An expanded easytorch runner for benchmarking time series models.
- - Support test loader and test process.
+ An extended EasyTorch Runner for benchmarking time series models.
+
+ This class provides support for a test data loader and a test process in addition to the standard
+ training and validation processes.
"""
- def __init__(self, cfg: dict):
- """Init
+ def __init__(self, cfg: Dict) -> None:
+ """
+ Initialize the BaseRunner.
Args:
- cfg (dict): all in one configurations
+ cfg (Dict): Configuration dictionary containing all relevant settings.
"""
super().__init__(cfg)
- # validate every `val_interval` epoch
- self.val_interval = cfg["VAL"].get("INTERVAL", 1) if hasattr(cfg, "VAL") else None
- # test every `test_interval` epoch
- self.test_interval = cfg["TEST"].get("INTERVAL", 1) if hasattr(cfg, "TEST") else None
+ # validate every `val_interval` epochs if configured
+ self.val_interval = cfg.get('VAL', {}).get('INTERVAL', 1)
+ # test every `test_interval` epochs if configured
+ self.test_interval = cfg.get('TEST', {}).get('INTERVAL', 1)
- # declare data loader
+ # declare data loaders
self.train_data_loader = None
self.val_data_loader = None
self.test_data_loader = None
- # fit higher easy-torch version
- if not hasattr(self,"to_running_device"):
+ # ensure compatibility with higher versions of EasyTorch
+ if not hasattr(self, 'to_running_device'):
from easytorch.device import to_device
self.to_running_device = to_device
- # set proctitle
- proctitle_name = "{0}({1})".format(cfg["MODEL"].get(
- "NAME", " "), cfg.get("DATASET_NAME", " "))
- setproctitle.setproctitle("{0}@BasicTS".format(proctitle_name))
+ # set process title
+ proctitle_name = f"{cfg['MODEL'].get('NAME')}({cfg.get('DATASET', {}).get('NAME', 'Unknown Dataset')})"
+ setproctitle.setproctitle(f'{proctitle_name}@BasicTS')
@staticmethod
def define_model(cfg: Dict) -> nn.Module:
- return cfg["MODEL"]["ARCH"](**cfg.MODEL.PARAM)
+ """
+ Define the model architecture based on the configuration.
- def init_training(self, cfg: dict):
- """Initialize training and support test dataloader.
+ Args:
+ cfg (Dict): Configuration dictionary containing model settings.
+
+ Returns:
+ nn.Module: The model architecture.
+ """
+
+ return cfg['MODEL']['ARCH'](**cfg['MODEL']['PARAM'])
+
+ def init_training(self, cfg: Dict) -> None:
+ """
+ Initialize training, including support for the test data loader.
Args:
- cfg (dict): config
+ cfg (Dict): Configuration dictionary.
"""
super().init_training(cfg)
- # init test
- if hasattr(cfg, "TEST"):
+ if hasattr(cfg, 'TEST'):
self.init_test(cfg)
@master_only
- def init_test(self, cfg: dict):
- """Initialize test.
+ def init_test(self, cfg: Dict) -> None:
+ """
+ Initialize the test data loader and related settings.
Args:
- cfg (dict): config
+ cfg (Dict): Configuration dictionary.
"""
- self.test_interval = cfg["TEST"].get("INTERVAL", 1)
+ self.test_interval = cfg['TEST'].get('INTERVAL', 1)
self.test_data_loader = self.build_test_data_loader(cfg)
- self.register_epoch_meter("test_time", "test", "{:.2f} (s)", plt=False)
+ self.register_epoch_meter('test_time', 'test', '{:.2f} (s)', plt=False)
- def build_test_data_loader(self, cfg: dict) -> DataLoader:
- """Build val dataset and dataloader.
- Build dataset by calling ```self.build_train_dataset```,
- build dataloader by calling ```build_data_loader```.
+ def build_test_data_loader(self, cfg: Dict) -> DataLoader:
+ """
+ Build the test data loader.
Args:
- cfg (dict): config
+ cfg (Dict): Configuration dictionary.
Returns:
- val data loader (DataLoader)
+ DataLoader: The test data loader.
"""
dataset = self.build_test_dataset(cfg)
- return build_data_loader(dataset, cfg["TEST"]["DATA"])
+ return build_data_loader(dataset, cfg['TEST']['DATA'])
@staticmethod
- def build_test_dataset(cfg: dict):
- """It can be implemented to a build dataset for test.
+ def build_test_dataset(cfg: Dict):
+ """
+ Build the test dataset.
Args:
- cfg (dict): config
+ cfg (Dict): Configuration dictionary.
Returns:
- val dataset (Dataset)
+ Dataset: The test dataset.
+
+ Raises:
+ NotImplementedError: Must be implemented in a subclass.
"""
- raise NotImplementedError()
+ raise NotImplementedError('build_test_dataset method must be implemented.')
- # support test process
- def on_epoch_end(self, epoch: int):
- """Callback at the end of an epoch.
+ def on_epoch_end(self, epoch: int) -> None:
+ """
+ Callback at the end of each epoch to handle validation and testing.
Args:
- epoch (int): current epoch.
+ epoch (int): The current epoch number.
"""
- # print train meters
- self.print_epoch_meters("train")
- # tensorboard plt meters
- self.plt_epoch_meters("train", epoch)
- # validate
+ # print training meters
+ self.print_epoch_meters('train')
+ # plot training meters to TensorBoard
+ self.plt_epoch_meters('train', epoch)
+ # perform validation if configured
if self.val_data_loader is not None and epoch % self.val_interval == 0:
self.validate(train_epoch=epoch)
- # test
+ # perform testing if configured
if self.test_data_loader is not None and epoch % self.test_interval == 0:
self.test_pipeline(train_epoch=epoch)
- # save model
+ # save the model checkpoint
self.save_model(epoch)
- # reset meters
+ # reset epoch meters
self.reset_epoch_meters()
@torch.no_grad()
@master_only
- def test_pipeline(self, cfg: dict = None, train_epoch: int = None):
- """The whole test process.
+ def test_pipeline(self, cfg: Optional[Dict] = None, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> None:
+ """
+ The complete test process.
Args:
- cfg (dict, optional): config
- train_epoch (int, optional): current epoch if in training process.
+ cfg (Dict, optional): Configuration dictionary. Defaults to None.
+ train_epoch (int, optional): Current epoch during training. Defaults to None.
+ save_metrics (bool, optional): Save the test metrics. Defaults to False.
+ save_results (bool, optional): Save the test results. Defaults to False.
"""
- # init test if not in training process
- if train_epoch is None:
+ if train_epoch is None and cfg is not None:
self.init_test(cfg)
self.on_test_start()
@@ -143,38 +163,47 @@ def test_pipeline(self, cfg: dict = None, train_epoch: int = None):
test_start_time = time.time()
self.model.eval()
- # test
- self.test()
+ # execute the test process
+ self.test(train_epoch=train_epoch, save_results=save_results, save_metrics=save_metrics)
test_end_time = time.time()
- self.update_epoch_meter("test_time", test_end_time - test_start_time)
- # print test meters
- self.print_epoch_meters("test")
+ self.update_epoch_meter('test_time', test_end_time - test_start_time)
+
+ self.print_epoch_meters('test')
if train_epoch is not None:
- # tensorboard plt meters
- self.plt_epoch_meters("test", train_epoch // self.test_interval)
+ self.plt_epoch_meters('test', train_epoch // self.test_interval)
+
+ # logging here for intuitiveness
+ if save_results:
+ self.logger.info(f'Test results saved to {os.path.join(self.ckpt_save_dir, "test_results.npz")}.')
+ if save_metrics:
+ self.logger.info(f'Test metrics saved to {os.path.join(self.ckpt_save_dir, "test_metrics.json")}.')
self.on_test_end()
@master_only
- def on_test_start(self):
- """Callback at the start of testing.
- """
+ def on_test_start(self) -> None:
+ """Callback at the start of testing."""
pass
@master_only
- def on_test_end(self):
- """Callback at the end of testing.
- """
+ def on_test_end(self) -> None:
+ """Callback at the end of testing."""
pass
- def test(self, train_epoch: int = None):
- """It can be implemented to define testing details.
+ def test(self, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> None:
+ """
+ Define the details of the testing process.
Args:
- train_epoch (int, optional): current epoch if in training process.
+ train_epoch (int, optional): Current epoch during training. Defaults to None.
+ save_metrics (bool, optional): Save the test metrics. Defaults to False.
+ save_results (bool, optional): Save the test results. Defaults to False.
+
+ Raises:
+ NotImplementedError: Must be implemented in a subclass.
"""
- raise NotImplementedError()
+ raise NotImplementedError('test method must be implemented.')
diff --git a/basicts/runners/base_tsf_runner.py b/basicts/runners/base_tsf_runner.py
index d2b4ad93..93d5cceb 100644
--- a/basicts/runners/base_tsf_runner.py
+++ b/basicts/runners/base_tsf_runner.py
@@ -1,267 +1,339 @@
+import os
+import json
import math
+import time
import inspect
import functools
from typing import Tuple, Union, Optional, Dict
import torch
import numpy as np
+from tqdm import tqdm
from easydict import EasyDict
-from easytorch.utils.dist import master_only
+from easytorch.core.checkpoint import save_ckpt
+from easytorch.utils.data_prefetcher import DevicePrefetcher
+from easytorch.utils import TimePredictor, get_local_rank, is_master, master_only
+from torch.nn.parallel import DistributedDataParallel as DDP
from .base_runner import BaseRunner
-from ..data import SCALER_REGISTRY
-from ..utils import load_pkl
from ..metrics import masked_mae, masked_mape, masked_rmse, masked_wape, masked_mse
class BaseTimeSeriesForecastingRunner(BaseRunner):
"""
- Runner for multivariate time series forecasting datasets.
+ Runner for multivariate time series forecasting tasks.
+
Features:
- - Evaluate at pre-defined horizons (1~12 as default) and overall.
- - Metrics: MAE, RMSE, MAPE. Allow customization. The best model is the one with the smallest mae at validation.
- - Support setup_graph for the models acting like tensorflow.
- - Loss: MAE (masked_mae) as default. Allow customization.
- - Support curriculum learning.
+ - Supports evaluation at pre-defined horizons (optional) and overall performance assessment.
+ - Metrics: MAE, RMSE, MAPE, WAPE, and MSE. Customizable. The best model is selected based on the smallest MAE on the validation set.
+ - Supports `setup_graph` for models that operate similarly to TensorFlow.
+ - Default loss function is MAE (masked_mae), but it can be customized.
+ - Supports curriculum learning.
- Users only need to implement the `forward` function.
+
+ Customization:
+ - Model:
+ - Args:
+ - history_data (torch.Tensor): Historical data with shape [B, L, N, C],
+ where B is the batch size, L is the sequence length, N is the number of nodes,
+ and C is the number of features.
+ - future_data (torch.Tensor or None): Future data with shape [B, L, N, C].
+ Can be None if there is no future data available.
+ - batch_seen (int): The number of batches seen so far.
+ - epoch (int): The current epoch number.
+ - train (bool): Indicates whether the model is in training mode.
+ - Return:
+ - Dict or torch.Tensor:
+ - If returning a Dict, it must contain the 'prediction' key. Other keys are optional and will be passed to the loss and metric functions.
+ - If returning a torch.Tensor, it should represent the model's predictions, with shape [B, L, N, C].
+
+ - Loss & Metrics (optional):
+ - Args:
+ - prediction (torch.Tensor): Model's predictions, with shape [B, L, N, C].
+ - target (torch.Tensor): Ground truth data, with shape [B, L, N, C].
+ - null_val (float): The value representing missing data in the dataset.
+ - Other args (optional): Additional arguments will be matched with keys in the model's return dictionary, if applicable.
+ - Return:
+ - torch.Tensor: The computed loss or metric value.
+
+ - Dataset (optional):
+ - Return: The returned data will be passed to the `forward` function as the `data` argument.
"""
def __init__(self, cfg: Dict):
super().__init__(cfg)
- self.dataset_name = cfg["DATASET_NAME"]
- # different datasets have different null_values, e.g., 0.0 or np.nan.
- self.null_val = cfg.get("NULL_VAL", np.nan) # consist with metric functions
- self.dataset_type = cfg.get("DATASET_TYPE", " ")
- self.if_rescale = cfg.get("RESCALE", True) # if rescale data when calculating loss or metrics, default as True
-
- # setup graph
- self.need_setup_graph = cfg["MODEL"].get("SETUP_GRAPH", False)
-
- # read scaler for re-normalization
- self.scaler = load_pkl("{0}/scaler_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True)))
- # define loss
- self.loss = cfg["TRAIN"]["LOSS"]
- # define metric
- self.metrics = cfg.get("METRICS", {"MAE": masked_mae, "RMSE": masked_rmse, "MAPE": masked_mape, "WAPE": masked_wape, "MSE": masked_mse})
- # TODO: use loss as the metric
- self.target_metrics = cfg.get("TARGET_METRICS", "MAE")
- # curriculum learning for output. Note that this is different from the CL in Seq2Seq archs.
- self.cl_param = cfg["TRAIN"].get("CL", None)
+
+ # setup graph flag
+ self.need_setup_graph = cfg['MODEL'].get('SETUP_GRAPH', False)
+
+ # initialize scaler
+ self.scaler = self.build_scaler(cfg)
+
+ # define loss function
+ self.loss = cfg['TRAIN']['LOSS']
+
+ # define metrics
+ self.metrics = cfg.get('METRICS', {}).get('FUNCS', {
+ 'MAE': masked_mae,
+ 'RMSE': masked_rmse,
+ 'MAPE': masked_mape,
+ 'WAPE': masked_wape,
+ 'MSE': masked_mse
+ })
+ self.target_metrics = cfg.get('METRICS', {}).get('TARGET', 'MAE')
+ self.metrics_best = cfg.get('METRICS', {}).get('BEST', 'min')
+ assert self.target_metrics in self.metrics, f'Target metric {self.target_metrics} not found in metrics.'
+ assert self.metrics_best in ['min', 'max'], f'Invalid best metric {self.metrics_best}.'
+ # handle null values in datasets, e.g., 0.0 or np.nan.
+ self.null_val = cfg.get('METRICS', {}).get('NULL_VAL', np.nan)
+
+ # support early stopping
+ # NOTE: If the project has been stopped early and its configuration is rerun,
+ # training will resume from the last saved checkpoint.
+ # This feature is designed primarily for the convenience of users,
+ # allowing them to continue training seamlessly after an interruption.
+ self.early_stopping_patience = cfg.get('TRAIN', {}).get('EARLY_STOPPING_PATIENCE', None)
+ self.current_patience = self.early_stopping_patience
+ assert self.early_stopping_patience is None or self.early_stopping_patience > 0, 'Early stopping patience must be a positive integer.'
+
+ # curriculum learning setup
+ self.cl_param = cfg['TRAIN'].get('CL', None)
if self.cl_param is not None:
- self.warm_up_epochs = cfg["TRAIN"].CL.get("WARM_EPOCHS", 0)
- self.cl_epochs = cfg["TRAIN"].CL.get("CL_EPOCHS")
- self.prediction_length = cfg["TRAIN"].CL.get("PREDICTION_LENGTH")
- self.cl_step_size = cfg["TRAIN"].CL.get("STEP_SIZE", 1)
- # evaluation
- self.if_evaluate_on_gpu = cfg.get("EVAL", EasyDict()).get("USE_GPU", True) # evaluate on gpu or cpu (gpu is faster but may cause OOM)
- self.evaluation_horizons = [_ - 1 for _ in cfg.get("EVAL", EasyDict()).get("HORIZONS", [])]
- assert len(self.evaluation_horizons) == 0 or min(self.evaluation_horizons) >= 0, "The horizon should start counting from 1."
+ self.warm_up_epochs = cfg['TRAIN'].CL.get('WARM_EPOCHS', 0)
+ self.cl_epochs = cfg['TRAIN'].CL.get('CL_EPOCHS')
+ self.prediction_length = cfg['TRAIN'].CL.get('PREDICTION_LENGTH')
+ self.cl_step_size = cfg['TRAIN'].CL.get('STEP_SIZE', 1)
+
+ # Eealuation settings
+ self.if_evaluate_on_gpu = cfg.get('EVAL', EasyDict()).get('USE_GPU', True)
+ self.evaluation_horizons = [_ - 1 for _ in cfg.get('EVAL', EasyDict()).get('HORIZONS', [])]
+ assert len(self.evaluation_horizons) == 0 or min(self.evaluation_horizons) >= 0, 'The horizon should start counting from 1.'
+
+ def build_scaler(self, cfg: Dict):
+ """Build scaler.
+
+ Args:
+ cfg (Dict): Configuration.
+
+ Returns:
+ Scaler instance or None if no scaler is declared.
+ """
+
+ if 'SCALER' in cfg:
+ return cfg['SCALER']['TYPE'](**cfg['SCALER']['PARAM'])
+ return None
def setup_graph(self, cfg: Dict, train: bool):
"""Setup all parameters and the computation graph.
- Implementation of many works (e.g., DCRNN, GTS) acts like TensorFlow, which creates parameters in the first feedforward process.
+
+ Some models (e.g., DCRNN, GTS) require creating parameters during the first forward pass, similar to TensorFlow.
Args:
- cfg (Dict): config
- train (bool): training or inferencing
+ cfg (Dict): Configuration.
+ train (bool): Whether the setup is for training or inference.
"""
dataloader = self.build_test_data_loader(cfg=cfg) if not train else self.build_train_data_loader(cfg=cfg)
- data = next(enumerate(dataloader))[1] # get the first batch
+ data = next(iter(dataloader)) # get the first batch
self.forward(data=data, epoch=1, iter_num=0, train=train)
def count_parameters(self):
"""Count the number of parameters in the model."""
num_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
- self.logger.info("Number of parameters: {0}".format(num_parameters))
+ self.logger.info(f'Number of parameters: {num_parameters}')
def init_training(self, cfg: Dict):
- """Initialize training.
-
- Including loss, training meters, etc.
+ """Initialize training components, including loss, meters, etc.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
"""
- # setup graph
if self.need_setup_graph:
self.setup_graph(cfg=cfg, train=True)
self.need_setup_graph = False
- # init training
+
super().init_training(cfg)
- # count parameters
self.count_parameters()
- for key, _ in self.metrics.items():
- self.register_epoch_meter("train_"+key, "train", "{:.6f}")
- def init_validation(self, cfg: Dict):
- """Initialize validation.
+ for key in self.metrics:
+ self.register_epoch_meter(f'train_{key}', 'train', '{:.4f}')
- Including validation meters, etc.
+ def init_validation(self, cfg: Dict):
+ """Initialize validation components, including meters.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
"""
super().init_validation(cfg)
- for key, _ in self.metrics.items():
- self.register_epoch_meter("val_"+key, "val", "{:.6f}")
+ for key in self.metrics:
+ self.register_epoch_meter(f'val_{key}', 'val', '{:.4f}')
def init_test(self, cfg: Dict):
- """Initialize test.
-
- Including test meters, etc.
+ """Initialize test components, including meters.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
"""
if self.need_setup_graph:
self.setup_graph(cfg=cfg, train=False)
self.need_setup_graph = False
+
super().init_test(cfg)
- for key, _ in self.metrics.items():
- self.register_epoch_meter("test_"+key, "test", "{:.6f}")
+ for key in self.metrics:
+ self.register_epoch_meter(f'test_{key}', 'test', '{:.4f}')
def build_train_dataset(self, cfg: Dict):
- """Build train dataset
-
- There are two types of preprocessing methods in BasicTS,
- 1. Normalize across the WHOLE dataset.
- 2. Normalize on EACH channel (i.e., calculate the mean and std of each channel).
-
- The reason why there are two different preprocessing methods is that each channel of the dataset may have a different value range.
- 1. Normalizing the WHOLE data set will preserve the relative size relationship between channels.
- Larger channels usually produce larger loss values, so more attention will be paid to these channels when optimizing the model.
- Therefore, this approach will achieve better performance when we evaluate on the rescaled dataset.
- For example, when evaluating rescaled data for two channels with values in the range [0, 1], [9000, 10000], the prediction on channel [0,1] is trivial.
- 2. Normalizing each channel will eliminate the gap in value range between channels.
- For example, a channel with a value in the range [0, 1] may be as important as a channel with a value in the range [9000, 10000].
- In this case we need to normalize each channel and evaluate without rescaling.
-
- There is no absolute good or bad distinction between the above two situations,
- and the decision needs to be made based on actual requirements or academic research habits.
- For example, the first approach is often adopted in the field of Spatial-Temporal Forecasting (STF).
- The second approach is often adopted in the field of Long-term Time Series Forecasting (LTSF).
-
- To avoid confusion for users and facilitate them to obtain results comparable to existing studies, we
- automatically select data based on the cfg.get("RESCALE") flag (default to True).
- if_rescale == True: use the data that is normalized across the WHOLE dataset
- if_rescale == False: use the data that is normalized on EACH channel
+ """Build the training dataset.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
Returns:
- train dataset (Dataset)
+ Dataset: The constructed training dataset.
"""
- data_file_path = "{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
- index_file_path = "{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TRAIN"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
-
- # build dataset args
- dataset_args = cfg.get("DATASET_ARGS", {})
- # three necessary arguments, data file path, corresponding index file path, and mode (train, valid, or test)
- dataset_args["data_file_path"] = data_file_path
- dataset_args["index_file_path"] = index_file_path
- dataset_args["mode"] = "train"
-
- dataset = cfg["DATASET_CLS"](**dataset_args)
- print("train len: {0}".format(len(dataset)))
-
- batch_size = cfg["TRAIN"]["DATA"]["BATCH_SIZE"]
- self.iter_per_epoch = math.ceil(len(dataset) / batch_size)
+
+ if 'DATASET' not in cfg:
+ # TODO: support building different datasets for training, validation, and test. (not tested)
+ dataset = cfg['TRAIN']['DATA']['DATASET']['TYPE'](**cfg['TRAIN']['DATA']['DATASET']['PARAM'])
+ self.logger.info(f'Train dataset length: {len(dataset)}')
+ batch_size = cfg['TRAIN']['DATA']['BATCH_SIZE']
+ self.iter_per_epoch = math.ceil(len(dataset) / batch_size)
+ else:
+ dataset = cfg['DATASET']['TYPE'](mode='train', **cfg['DATASET']['PARAM'])
+ self.logger.info(f'Train dataset length: {len(dataset)}')
+ batch_size = cfg['TRAIN']['DATA']['BATCH_SIZE']
+ self.iter_per_epoch = math.ceil(len(dataset) / batch_size)
return dataset
@staticmethod
def build_val_dataset(cfg: Dict):
- """Build val dataset
+ """Build the validation dataset.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
Returns:
- validation dataset (Dataset)
+ Dataset: The constructed validation dataset.
"""
- # see build_train_dataset for details
- data_file_path = "{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["VAL"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
- index_file_path = "{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["VAL"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
-
- # build dataset args
- dataset_args = cfg.get("DATASET_ARGS", {})
- # three necessary arguments, data file path, corresponding index file path, and mode (train, valid, or test)
- dataset_args["data_file_path"] = data_file_path
- dataset_args["index_file_path"] = index_file_path
- dataset_args["mode"] = "valid"
-
- dataset = cfg["DATASET_CLS"](**dataset_args)
- print("val len: {0}".format(len(dataset)))
+
+ if 'DATASET' not in cfg:
+ # TODO: support building different datasets for training, validation, and test. (not tested)
+ dataset = cfg['VAL']['DATA']['DATASET']['TYPE'](**cfg['VAL']['DATA']['DATASET']['PARAM'])
+ print(f'VAlidation dataset length: {len(dataset)}')
+ else:
+ dataset = cfg['DATASET']['TYPE'](mode='valid', **cfg['DATASET']['PARAM'])
+ print(f'Validation dataset length: {len(dataset)}')
return dataset
@staticmethod
def build_test_dataset(cfg: Dict):
- """Build val dataset
+ """Build the test dataset.
Args:
- cfg (Dict): config
+ cfg (Dict): Configuration.
Returns:
- train dataset (Dataset)
+ Dataset: The constructed test dataset.
"""
- data_file_path = "{0}/data_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TEST"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
- index_file_path = "{0}/index_in_{1}_out_{2}_rescale_{3}.pkl".format(
- cfg["TEST"]["DATA"]["DIR"],
- cfg["DATASET_INPUT_LEN"],
- cfg["DATASET_OUTPUT_LEN"],
- cfg.get("RESCALE", True))
-
- # build dataset args
- dataset_args = cfg.get("DATASET_ARGS", {})
- # three necessary arguments, data file path, corresponding index file path, and mode (train, valid, or test)
- dataset_args["data_file_path"] = data_file_path
- dataset_args["index_file_path"] = index_file_path
- dataset_args["mode"] = "test"
-
- dataset = cfg["DATASET_CLS"](**dataset_args)
- print("test len: {0}".format(len(dataset)))
+
+ if 'DATASET' not in cfg:
+ # TODO: support building different datasets for training, validation, and test. (not tested)
+ dataset = cfg['TEST']['DATA']['DATASET']['TYPE'](**cfg['TEST']['DATA']['DATASET']['PARAM'])
+ print(f'Test dataset length: {len(dataset)}')
+ else:
+ dataset = cfg['DATASET']['TYPE'](mode='test', **cfg['DATASET']['PARAM'])
+ print(f'Test dataset length: {len(dataset)}')
return dataset
+ def train(self, cfg: Dict):
+ """Train model.
+
+ Train process:
+ [init_training]
+ for in train_epoch
+ [on_epoch_start]
+ for in train iters
+ [train_iters]
+ [on_epoch_end] ------> Epoch Val: val every n epoch
+ [on_validating_start]
+ for in val iters
+ val iter
+ [on_validating_end]
+ [on_training_end]
+
+ Args:
+ cfg (Dict): config
+ """
+
+ self.init_training(cfg)
+
+ # train time predictor
+ train_time_predictor = TimePredictor(self.start_epoch, self.num_epochs)
+
+ # training loop
+ epoch_index = 0
+ for epoch_index in range(self.start_epoch, self.num_epochs):
+ # early stopping
+ if self.early_stopping_patience is not None and self.current_patience <= 0:
+ self.logger.info('Early stopping.')
+ break
+
+ epoch = epoch_index + 1
+ self.on_epoch_start(epoch)
+ epoch_start_time = time.time()
+ # start training
+ self.model.train()
+
+ # tqdm process bar
+ if cfg.get('TRAIN.DATA.DEVICE_PREFETCH', False):
+ data_loader = DevicePrefetcher(self.train_data_loader)
+ else:
+ data_loader = self.train_data_loader
+ data_loader = tqdm(data_loader) if get_local_rank() == 0 else data_loader
+
+ # data loop
+ for iter_index, data in enumerate(data_loader):
+ loss = self.train_iters(epoch, iter_index, data)
+ if loss is not None:
+ self.backward(loss)
+ # update lr_scheduler
+ if self.scheduler is not None:
+ self.scheduler.step()
+
+ epoch_end_time = time.time()
+ # epoch time
+ self.update_epoch_meter('train_time', epoch_end_time - epoch_start_time)
+ self.on_epoch_end(epoch)
+
+ expected_end_time = train_time_predictor.get_expected_end_time(epoch)
+
+ # estimate training finish time
+ if epoch < self.num_epochs:
+ self.logger.info('The estimated training finish time is {}'.format(
+ time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(expected_end_time))))
+
+ # log training finish time
+ self.logger.info('The training finished at {}'.format(
+ time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
+ ))
+
+ self.on_training_end(cfg=cfg, train_epoch=epoch_index + 1)
+
def curriculum_learning(self, epoch: int = None) -> int:
- """Calculate task level in curriculum learning.
+ """Calculate task level for curriculum learning.
Args:
- epoch (int, optional): current epoch if in training process, else None. Defaults to None.
+ epoch (int, optional): Current epoch if in training process; None otherwise. Defaults to None.
Returns:
- int: task level
+ int: Task level for the current epoch.
"""
if epoch is None:
@@ -269,186 +341,282 @@ def curriculum_learning(self, epoch: int = None) -> int:
epoch -= 1
# generate curriculum length
if epoch < self.warm_up_epochs:
- # still warm up
+ # still in warm-up phase
cl_length = self.prediction_length
else:
- _ = ((epoch - self.warm_up_epochs) // self.cl_epochs + 1) * self.cl_step_size
- cl_length = min(_, self.prediction_length)
+ progress = ((epoch - self.warm_up_epochs) // self.cl_epochs + 1) * self.cl_step_size
+ cl_length = min(progress, self.prediction_length)
return cl_length
- def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
+ def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> Dict:
+ """
+ Performs the forward pass for training, validation, and testing.
+ Note: The outputs are not re-scaled.
Args:
- data (tuple): data (future data, history data). [B, L, N, C] for each of them
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
+ data (Dict): A dictionary containing 'target' (future data) and 'inputs' (history data) (normalized by self.scaler).
+ epoch (int, optional): Current epoch number. Defaults to None.
+ iter_num (int, optional): Current iteration number. Defaults to None.
+ train (bool, optional): Indicates whether the forward pass is for training. Defaults to True.
Returns:
- Dict: must contain keys: inputs, prediction, target
+ Dict: A dictionary containing the keys:
+ - 'inputs': Selected input features.
+ - 'prediction': Model predictions.
+ - 'target': Selected target features.
+
+ Raises:
+ AssertionError: If the shape of the model output does not match [B, L, N].
"""
raise NotImplementedError()
- def metric_forward(self, metric_func, args) -> torch.Tensor:
- """Computing metrics.
+ def metric_forward(self, metric_func, args: Dict) -> torch.Tensor:
+ """Compute metrics using the given metric function.
Args:
- metric_func (function, functools.partial): metric function.
- args (Dict): arguments for metrics computation.
+ metric_func (function or functools.partial): Metric function.
+ args (Dict): Arguments for metrics computation.
Returns:
- torch.Tensor: metric value.
+ torch.Tensor: Computed metric value.
"""
+
covariate_names = inspect.signature(metric_func).parameters.keys()
args = {k: v for k, v in args.items() if k in covariate_names}
if isinstance(metric_func, functools.partial):
- # support partial function
- # users can define their partial function in the config file
- # e.g., functools.partial(masked_mase, freq="4", null_val=np.nan)
- if "null_val" in metric_func.keywords: # null_val is provided
- # assert self.null_val is None, "Null_val is provided in metric function. The CFG.NULL_VAL should not be set."
- pass # error when using multiple metrics, some of which require null_val and some do not
- elif "null_val" in covariate_names: # null_val is required but not provided
- args["null_val"] = self.null_val
+ if 'null_val' not in metric_func.keywords and 'null_val' in covariate_names: # null_val is required but not provided
+ args['null_val'] = self.null_val
metric_item = metric_func(**args)
elif callable(metric_func):
- # is a function
- # filter out keys that are not in function arguments
- if "null_val" in covariate_names: # null_val is required
- args["null_val"] = self.null_val
+ if 'null_val' in covariate_names: # null_val is required
+ args['null_val'] = self.null_val
metric_item = metric_func(**args)
else:
- raise TypeError("Unknown metric type: {0}".format(type(metric_func)))
+ raise TypeError(f'Unknown metric type: {type(metric_func)}')
return metric_item
- def rescale_data(self, input_data: Dict) -> Dict:
- """Rescale data.
+ def preprocessing(self, input_data: Dict) -> Dict:
+ """Preprocess data.
+
+ Args:
+ input_data (Dict): Dictionary containing data to be processed.
+
+ Returns:
+ Dict: Processed data.
+ """
+
+ if self.scaler is not None:
+ input_data['target'] = self.scaler.transform(input_data['target'])
+ input_data['inputs'] = self.scaler.transform(input_data['inputs'])
+ # TODO: add more preprocessing steps as needed.
+ return input_data
+
+ def postprocessing(self, input_data: Dict) -> Dict:
+ """Postprocess data.
Args:
- data (Dict): Dict of data to be re-scaled.
+ input_data (Dict): Dictionary containing data to be processed.
Returns:
- Dict: Dict re-scaled data.
+ Dict: Processed data.
"""
- if self.if_rescale:
- input_data["prediction"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["prediction"], **self.scaler["args"])
- input_data["target"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["target"], **self.scaler["args"])
- input_data["inputs"] = SCALER_REGISTRY.get(self.scaler["func"])(input_data["inputs"], **self.scaler["args"])
+ if self.scaler is not None and self.scaler.rescale:
+ input_data['prediction'] = self.scaler.inverse_transform(input_data['prediction'])
+ input_data['target'] = self.scaler.inverse_transform(input_data['target'])
+ input_data['inputs'] = self.scaler.inverse_transform(input_data['inputs'])
+ # TODO: add more postprocessing steps as needed.
return input_data
def train_iters(self, epoch: int, iter_index: int, data: Union[torch.Tensor, Tuple]) -> torch.Tensor:
- """Training details.
+ """Training iteration process.
Args:
- data (Union[torch.Tensor, Tuple]): Data provided by DataLoader
- epoch (int): current epoch.
- iter_index (int): current iter.
+ epoch (int): Current epoch.
+ iter_index (int): Current iteration index.
+ data (Union[torch.Tensor, Tuple]): Data provided by DataLoader.
Returns:
- loss (torch.Tensor)
+ torch.Tensor: Loss value.
"""
- iter_num = (epoch-1) * self.iter_per_epoch + iter_index
+ iter_num = (epoch - 1) * self.iter_per_epoch + iter_index
+ data = self.preprocessing(data)
forward_return = self.forward(data=data, epoch=epoch, iter_num=iter_num, train=True)
- # re-scale data
- forward_return = self.rescale_data(forward_return)
- # loss
+ forward_return = self.postprocessing(forward_return)
+
if self.cl_param:
cl_length = self.curriculum_learning(epoch=epoch)
- forward_return["prediction"] = forward_return["prediction"][:, :cl_length, :, :]
- forward_return["target"] = forward_return["target"][:, :cl_length, :, :]
+ forward_return['prediction'] = forward_return['prediction'][:, :cl_length, :, :]
+ forward_return['target'] = forward_return['target'][:, :cl_length, :, :]
loss = self.metric_forward(self.loss, forward_return)
- # metrics
+
for metric_name, metric_func in self.metrics.items():
metric_item = self.metric_forward(metric_func, forward_return)
- self.update_epoch_meter("train_"+metric_name, metric_item.item())
+ self.update_epoch_meter(f'train_{metric_name}', metric_item.item())
return loss
def val_iters(self, iter_index: int, data: Union[torch.Tensor, Tuple]):
- """Validation details.
+ """Validation iteration process.
Args:
- iter_index (int): current iter.
- data (Union[torch.Tensor, Tuple]): Data provided by DataLoader
+ iter_index (int): Current iteration index.
+ data (Union[torch.Tensor, Tuple]): Data provided by DataLoader.
"""
+ data = self.preprocessing(data)
forward_return = self.forward(data=data, epoch=None, iter_num=iter_index, train=False)
- # re-scale data
- forward_return = self.rescale_data(forward_return)
- # metrics
+ forward_return = self.postprocessing(forward_return)
+
for metric_name, metric_func in self.metrics.items():
metric_item = self.metric_forward(metric_func, forward_return)
- self.update_epoch_meter("val_"+metric_name, metric_item.item())
+ self.update_epoch_meter(f'val_{metric_name}', metric_item.item())
- def evaluate(self, returns_all):
- """Evaluate the model on test data.
+ def compute_evaluation_metrics(self, returns_all: Dict):
+ """Compute metrics for evaluating model performance during the test process.
Args:
- returns_all (Dict): must contain keys: inputs, prediction, target
+ returns_all (Dict): Must contain keys: inputs, prediction, target.
"""
- # test performance of different horizon
+ metrics_results = {}
for i in self.evaluation_horizons:
- # For horizon i, only calculate the metrics **at that time** slice here.
- pred = returns_all["prediction"][:, i, :, :]
- real = returns_all["target"][:, i, :, :]
- # metrics
- metric_repr = ""
+ pred = returns_all['prediction'][:, i, :, :]
+ real = returns_all['target'][:, i, :, :]
+
+ metrics_results[f'horizon_{i + 1}'] = {}
+ metric_repr = ''
for metric_name, metric_func in self.metrics.items():
- if metric_name.lower() == "mase": continue # MASE needs to be calculated after all horizons
- metric_item = self.metric_forward(metric_func, {"prediction": pred, "target": real})
- metric_repr += ", Test {0}: {1:.6f}".format(metric_name, metric_item.item())
- log = "Evaluate best model on test data for horizon {:d}" + metric_repr
- log = log.format(i+1)
- self.logger.info(log)
- # test performance overall
+ if metric_name.lower() == 'mase':
+ continue # MASE needs to be calculated after all horizons
+ metric_item = self.metric_forward(metric_func, {'prediction': pred, 'target': real})
+ metric_repr += f', Test {metric_name}: {metric_item.item():.4f}'
+ metrics_results[f'horizon_{i + 1}'][metric_name] = metric_item.item()
+ self.logger.info(f'Evaluate best model on test data for horizon {i + 1}{metric_repr}')
+
+ metrics_results['overall'] = {}
for metric_name, metric_func in self.metrics.items():
metric_item = self.metric_forward(metric_func, returns_all)
- self.update_epoch_meter("test_"+metric_name, metric_item.item())
+ self.update_epoch_meter(f'test_{metric_name}', metric_item.item())
+ metrics_results['overall'][metric_name] = metric_item.item()
+
+ return metrics_results
@torch.no_grad()
@master_only
- def test(self):
- """Evaluate the model.
-
+ def test(self, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> Dict:
+ """Test process.
+
Args:
- train_epoch (int, optional): current epoch if in training process.
+ train_epoch (Optional[int]): Current epoch if in training process.
+ save_metrics (bool): Save the test metrics. Defaults to False.
+ save_results (bool): Save the test results. Defaults to False.
"""
- # TODO: fix OOM: especially when inputs, targets, and predictions are saved at the same time.
- # test loop
- prediction =[]
- target = []
- inputs = []
- for _, data in enumerate(self.test_data_loader):
+ prediction, target, inputs = [], [], []
+
+ for data in tqdm(self.test_data_loader):
+ data = self.preprocessing(data)
forward_return = self.forward(data, epoch=None, iter_num=None, train=False)
+ forward_return = self.postprocessing(forward_return)
+
if not self.if_evaluate_on_gpu:
- forward_return["prediction"] = forward_return["prediction"].detach().cpu()
- forward_return["target"] = forward_return["target"].detach().cpu()
- forward_return["inputs"] = forward_return["inputs"].detach().cpu()
- prediction.append(forward_return["prediction"])
- target.append(forward_return["target"])
- inputs.append(forward_return["inputs"])
+ forward_return['prediction'] = forward_return['prediction'].detach().cpu()
+ forward_return['target'] = forward_return['target'].detach().cpu()
+ forward_return['inputs'] = forward_return['inputs'].detach().cpu()
+
+ prediction.append(forward_return['prediction'])
+ target.append(forward_return['target'])
+ inputs.append(forward_return['inputs'])
+
prediction = torch.cat(prediction, dim=0)
target = torch.cat(target, dim=0)
inputs = torch.cat(inputs, dim=0)
- # re-scale data
- returns_all = self.rescale_data({"prediction": prediction, "target": target, "inputs": inputs})
- # evaluate
- self.evaluate(returns_all)
+
+ returns_all = {'prediction': prediction, 'target': target, 'inputs': inputs}
+ metrics_results = self.compute_evaluation_metrics(returns_all)
+
+ # save
+ if save_results:
+ # save returns_all to self.ckpt_save_dir/test_results.npz
+ test_results = {k: v.cpu().numpy() for k, v in returns_all.items()}
+ np.savez(os.path.join(self.ckpt_save_dir, 'test_results.npz'), **test_results)
+
+ if save_metrics:
+ # save metrics_results to self.ckpt_save_dir/test_metrics.json
+ with open(os.path.join(self.ckpt_save_dir, 'test_metrics.json'), 'w') as f:
+ json.dump(metrics_results, f, indent=4)
+
return returns_all
@master_only
def on_validating_end(self, train_epoch: Optional[int]):
- """Callback at the end of validating.
+ """Callback at the end of the validation process.
Args:
- train_epoch (Optional[int]): current epoch if in training process.
+ train_epoch (Optional[int]): Current epoch if in training process.
"""
-
+ greater_best = not self.metrics_best == 'min'
if train_epoch is not None:
- self.save_best_model(train_epoch, "val_" + self.target_metrics, greater_best=False)
+ self.save_best_model(train_epoch, 'val_' + self.target_metrics, greater_best=greater_best)
+
+ @master_only
+ def save_best_model(self, epoch: int, metric_name: str, greater_best: bool = True):
+ """Save the best model while training.
+
+ Examples:
+ >>> def on_validating_end(self, train_epoch: Optional[int]):
+ >>> if train_epoch is not None:
+ >>> self.save_best_model(train_epoch, 'val/loss', greater_best=False)
+
+ Args:
+ epoch (int): current epoch.
+ metric_name (str): metric name used to measure the model, must be registered in `epoch_meter`.
+ greater_best (bool, optional): `True` means greater value is best, such as `acc`
+ `False` means lower value is best, such as `loss`. Defaults to True.
+ """
+
+ metric = self.meter_pool.get_avg(metric_name)
+ best_metric = self.best_metrics.get(metric_name)
+ if best_metric is None or (metric > best_metric if greater_best else metric < best_metric):
+ self.best_metrics[metric_name] = metric
+ model = self.model.module if isinstance(self.model, DDP) else self.model
+ ckpt_dict = {
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optim_state_dict': self.optim.state_dict(),
+ 'best_metrics': self.best_metrics
+ }
+ ckpt_path = os.path.join(
+ self.ckpt_save_dir,
+ '{}_best_{}.pt'.format(self.model_name, metric_name.replace('/', '_'))
+ )
+ save_ckpt(ckpt_dict, ckpt_path, self.logger)
+ self.current_patience = self.early_stopping_patience # reset patience
+ else:
+ if self.early_stopping_patience is not None:
+ self.current_patience -= 1
+
+ def on_training_end(self, cfg: Dict, train_epoch: Optional[int] = None):
+ """Callback at the end of the training process.
+
+ Args:
+ cfg (Dict): Configuration.
+ train_epoch (Optional[int]): End epoch if in training process.
+ """
+
+ if is_master():
+ # close tensorboard writer
+ self.tensorboard_writer.close()
+
+ if hasattr(cfg, 'TEST'):
+ # evaluate the best model on the test set
+ best_model_path = os.path.join(
+ self.ckpt_save_dir,
+ '{}_best_val_{}.pt'.format(self.model_name, self.target_metrics.replace('/', '_'))
+ )
+ self.logger.info('Evaluating the best model on the test set.')
+ self.load_model(ckpt_path=best_model_path, strict=True)
+ self.test_pipeline(cfg=cfg, train_epoch=train_epoch, save_metrics=True, save_results=True)
diff --git a/basicts/runners/runner_zoo/m4_tsf_runner.py b/basicts/runners/runner_zoo/m4_tsf_runner.py
deleted file mode 100644
index 5bc07e31..00000000
--- a/basicts/runners/runner_zoo/m4_tsf_runner.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ..base_m4_runner import BaseM4Runner
-
-
-class M4ForecastingRunner(BaseM4Runner):
- """Simple Runner: select forward features and target features. This runner can cover most cases."""
-
- def __init__(self, cfg: dict):
- super().__init__(cfg)
- self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
- self.target_features = cfg["MODEL"].get("TARGET_FEATURES", None)
-
- def select_input_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select input features.
-
- Args:
- data (torch.Tensor): input history data, shape [B, L, N, C]
-
- Returns:
- torch.Tensor: reshaped data
- """
-
- # select feature using self.forward_features
- if self.forward_features is not None:
- data = data[:, :, :, self.forward_features]
- return data
-
- def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select target feature.
-
- Args:
- data (torch.Tensor): prediction of the model with arbitrary shape.
-
- Returns:
- torch.Tensor: reshaped data with shape [B, L, N, C]
- """
-
- # select feature using self.target_features
- data = data[:, :, :, self.target_features]
- return data
-
- def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
-
- Args:
- data (tuple): (future_data, history_data, future_mask, history_mask).
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
-
- Returns:
- tuple: (prediction, real_value)
- """
-
- # preprocess
- future_data, history_data, future_mask, history_mask = data
- history_data = self.to_running_device(history_data) # B, L, 1, C
- future_data = self.to_running_device(future_data) # B, L, 1, C
- history_mask = self.to_running_device(history_mask) # B, L, 1
- future_mask = self.to_running_device(future_mask) # B, L, 1
-
- batch_size, length, num_nodes, _ = future_data.shape
-
- history_data = self.select_input_features(history_data)
- if train:
- future_data_4_dec = self.select_input_features(future_data)
- else:
- future_data_4_dec = self.select_input_features(future_data)
- # only use the temporal features
- future_data_4_dec[..., 0] = torch.empty_like(future_data_4_dec[..., 0])
-
- # model forward
- model_return = self.model(history_data=history_data, future_data=future_data_4_dec, history_mask=history_mask, future_mask=future_mask, batch_seen=iter_num, epoch=epoch, train=train)
- if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return * future_mask.unsqueeze(-1)}
- if "inputs" not in model_return: model_return["inputs"] = self.select_target_features(history_data)
- if "target" not in model_return: model_return["target"] = self.select_target_features(future_data * future_mask.unsqueeze(-1))
-
- return model_return
diff --git a/basicts/runners/runner_zoo/simple_tsf_runner.py b/basicts/runners/runner_zoo/simple_tsf_runner.py
index 4965ceaa..7281e150 100644
--- a/basicts/runners/runner_zoo/simple_tsf_runner.py
+++ b/basicts/runners/runner_zoo/simple_tsf_runner.py
@@ -1,79 +1,100 @@
-import torch
+from typing import Dict
+import torch
from ..base_tsf_runner import BaseTimeSeriesForecastingRunner
-
class SimpleTimeSeriesForecastingRunner(BaseTimeSeriesForecastingRunner):
- """Simple Runner: select forward features and target features. This runner can cover most cases."""
+ """
+ A Simple Runner for Time Series Forecasting:
+ Selects forward and target features. This runner is designed to handle most cases.
+
+ Args:
+ cfg (Dict): Configuration dictionary.
+ """
+
+ def __init__(self, cfg: Dict):
- def __init__(self, cfg: dict):
super().__init__(cfg)
- self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
- self.target_features = cfg["MODEL"].get("TARGET_FEATURES", None)
+ self.forward_features = cfg['MODEL'].get('FORWARD_FEATURES', None)
+ self.target_features = cfg['MODEL'].get('TARGET_FEATURES', None)
def select_input_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select input features.
+ """
+ Selects input features based on the forward features specified in the configuration.
Args:
- data (torch.Tensor): input history data, shape [B, L, N, C]
+ data (torch.Tensor): Input history data with shape [B, L, N, C].
Returns:
- torch.Tensor: reshaped data
+ torch.Tensor: Data with selected features.
"""
- # select feature using self.forward_features
if self.forward_features is not None:
data = data[:, :, :, self.forward_features]
return data
def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
- """Select target feature.
+ """
+ Selects target features based on the target features specified in the configuration.
Args:
- data (torch.Tensor): prediction of the model with arbitrary shape.
+ data (torch.Tensor): Model prediction data with arbitrary shape.
Returns:
- torch.Tensor: reshaped data with shape [B, L, N, C]
+ torch.Tensor: Data with selected target features and shape [B, L, N, C].
"""
- # select feature using self.target_features
data = data[:, :, :, self.target_features]
return data
- def forward(self, data: tuple, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> tuple:
- """Feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
+ def forward(self, data: Dict, epoch: int = None, iter_num: int = None, train: bool = True, **kwargs) -> Dict:
+ """
+ Performs the forward pass for training, validation, and testing.
Args:
- data (tuple): data (future data, history ata).
- epoch (int, optional): epoch number. Defaults to None.
- iter_num (int, optional): iteration number. Defaults to None.
- train (bool, optional): if in the training process. Defaults to True.
+ data (Dict): A dictionary containing 'target' (future data) and 'inputs' (history data) (normalized by self.scaler).
+ epoch (int, optional): Current epoch number. Defaults to None.
+ iter_num (int, optional): Current iteration number. Defaults to None.
+ train (bool, optional): Indicates whether the forward pass is for training. Defaults to True.
Returns:
- dict: keys that must be included: inputs, prediction, target
+ Dict: A dictionary containing the keys:
+ - 'inputs': Selected input features.
+ - 'prediction': Model predictions.
+ - 'target': Selected target features.
+
+ Raises:
+ AssertionError: If the shape of the model output does not match [B, L, N].
"""
- # preprocess
- future_data, history_data = data
- history_data = self.to_running_device(history_data) # B, L, N, C
- future_data = self.to_running_device(future_data) # B, L, N, C
+ # Preprocess input data
+ future_data, history_data = data['target'], data['inputs']
+ history_data = self.to_running_device(history_data) # Shape: [B, L, N, C]
+ future_data = self.to_running_device(future_data) # Shape: [B, L, N, C]
batch_size, length, num_nodes, _ = future_data.shape
+ # Select input features
history_data = self.select_input_features(history_data)
- if train:
- future_data_4_dec = self.select_input_features(future_data)
- else:
- future_data_4_dec = self.select_input_features(future_data)
- # only use the temporal features
+ future_data_4_dec = self.select_input_features(future_data)
+
+ if not train:
+ # For non-training phases, use only temporal features
future_data_4_dec[..., 0] = torch.empty_like(future_data_4_dec[..., 0])
- # model forward
- model_return = self.model(history_data=history_data, future_data=future_data_4_dec, batch_seen=iter_num, epoch=epoch, train=train)
+ # Forward pass through the model
+ model_return = self.model(history_data=history_data, future_data=future_data_4_dec,
+ batch_seen=iter_num, epoch=epoch, train=train)
+
+ # Parse model return
+ if isinstance(model_return, torch.Tensor):
+ model_return = {'prediction': model_return}
+ if 'inputs' not in model_return:
+ model_return['inputs'] = self.select_target_features(history_data)
+ if 'target' not in model_return:
+ model_return['target'] = self.select_target_features(future_data)
+
+ # Ensure the output shape is correct
+ assert list(model_return['prediction'].shape)[:3] == [batch_size, length, num_nodes], \
+ "The shape of the output is incorrect. Ensure it matches [B, L, N, C]."
- # parse model return
- if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return}
- if "inputs" not in model_return: model_return["inputs"] = self.select_target_features(history_data)
- if "target" not in model_return: model_return["target"] = self.select_target_features(future_data)
- assert list(model_return["prediction"].shape)[:3] == [batch_size, length, num_nodes], \
- "error shape of the output, edit the forward function to reshape it to [B, L, N, C]"
return model_return
diff --git a/basicts/scaler/__init__.py b/basicts/scaler/__init__.py
new file mode 100644
index 00000000..fd8b961a
--- /dev/null
+++ b/basicts/scaler/__init__.py
@@ -0,0 +1,9 @@
+from .base_scaler import BaseScaler
+from .z_score_scaler import ZScoreScaler
+from .min_max_scaler import MinMaxScaler
+
+__all__ = [
+ 'BaseScaler',
+ 'ZScoreScaler',
+ 'MinMaxScaler'
+]
diff --git a/basicts/scaler/base_scaler.py b/basicts/scaler/base_scaler.py
new file mode 100644
index 00000000..9ea9ccfc
--- /dev/null
+++ b/basicts/scaler/base_scaler.py
@@ -0,0 +1,47 @@
+from dataclasses import dataclass
+
+import torch
+
+
+@dataclass
+class BaseScaler:
+ """
+ BaseScaler is an abstract class for data scaling and normalization methods.
+
+ Attributes:
+ dataset_name (str): The name of the dataset, used to load the data.
+ train_ratio (float): Ratio of the data to be used for training, for fitting the scaler.
+ norm_each_channel (bool): Flag indicating whether to normalize each channel separately.
+ rescale (bool): Flag indicating whether to apply rescaling.
+ """
+
+ dataset_name: str
+ train_ratio: float
+ norm_each_channel: bool
+ rescale: bool
+
+ def transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the scaling transformation to the input data.
+
+ Args:
+ input_data (torch.Tensor): Input data to be transformed.
+
+ Returns:
+ torch.Tensor: Scaled data.
+ """
+
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def inverse_transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the inverse scaling transformation to the input data.
+
+ Args:
+ input_data (torch.Tensor): Input data to be transformed back.
+
+ Returns:
+ torch.Tensor: Original scale data.
+ """
+
+ raise NotImplementedError("Subclasses should implement this method.")
diff --git a/basicts/scaler/min_max_scaler.py b/basicts/scaler/min_max_scaler.py
new file mode 100644
index 00000000..b060ce08
--- /dev/null
+++ b/basicts/scaler/min_max_scaler.py
@@ -0,0 +1,94 @@
+import json
+
+import torch
+import numpy as np
+
+from .base_scaler import BaseScaler
+
+
+class MinMaxScaler(BaseScaler):
+ """
+ MinMaxScaler performs min-max normalization on the dataset, scaling the data to a specified range
+ (typically [0, 1] or [-1, 1]).
+
+ Attributes:
+ min (np.ndarray): The minimum values of the training data used for normalization.
+ If `norm_each_channel` is True, this is an array of minimum values, one for each channel. Otherwise, it's a single scalar.
+ max (np.ndarray): The maximum values of the training data used for normalization.
+ If `norm_each_channel` is True, this is an array of maximum values, one for each channel. Otherwise, it's a single scalar.
+ target_channel (int): The specific channel (feature) to which normalization is applied.
+ By default, it is set to 0, indicating the first channel.
+ """
+
+ def __init__(self, dataset_name: str, train_ratio: float, norm_each_channel: bool = True, rescale: bool = True):
+ """
+ Initialize the MinMaxScaler by loading the dataset and fitting the scaler to the training data.
+
+ The scaler computes the minimum and maximum values from the training data, which are then used
+ to normalize the data during the `transform` operation.
+
+ Args:
+ dataset_name (str): The name of the dataset used to load the data.
+ train_ratio (float): The ratio of the dataset to be used for training. The scaler is fitted on this portion of the data.
+ norm_each_channel (bool): Flag indicating whether to normalize each channel separately.
+ If True, the min and max values are computed for each channel independently. Defaults to True.
+ rescale (bool): Flag indicating whether to apply rescaling after normalization.
+ This flag is included for consistency with the base class but is typically True in min-max scaling.
+ """
+
+ super().__init__(dataset_name, train_ratio, norm_each_channel, rescale)
+ self.target_channel = 0 # assuming normalization on the first channel
+
+ # load dataset description and data
+ description_file_path = f'datasets/{dataset_name}/desc.json'
+ with open(description_file_path, 'r') as f:
+ description = json.load(f)
+ data_file_path = f'datasets/{dataset_name}/data.dat'
+ data = np.memmap(data_file_path, dtype='float32', mode='r', shape=tuple(description['shape']))
+
+ # split data into training set based on the train_ratio
+ train_size = int(len(data) * train_ratio)
+ train_data = data[:train_size, :, self.target_channel].copy()
+
+ # compute minimum and maximum values for normalization
+ if norm_each_channel:
+ self.min = np.min(train_data, axis=0, keepdims=True)
+ self.max = np.max(train_data, axis=0, keepdims=True)
+ else:
+ self.min = np.min(train_data)
+ self.max = np.max(train_data)
+
+ def transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Apply min-max normalization to the input data.
+
+ This method normalizes the input data using the minimum and maximum values computed from the training data.
+ The normalization is applied only to the specified `target_channel`.
+
+ Args:
+ input_data (torch.Tensor): The input data to be normalized.
+
+ Returns:
+ torch.Tensor: The normalized data with the same shape as the input.
+ """
+
+ input_data[..., self.target_channel] = (input_data[..., self.target_channel] - self.min) / (self.max - self.min)
+ return input_data
+
+ def inverse_transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Reverse the min-max normalization to recover the original data scale.
+
+ This method transforms the normalized data back to its original scale using the minimum and maximum
+ values computed from the training data. This is useful for interpreting model outputs or for further analysis
+ in the original data scale.
+
+ Args:
+ input_data (torch.Tensor): The normalized data to be transformed back.
+
+ Returns:
+ torch.Tensor: The data transformed back to its original scale.
+ """
+
+ input_data[..., self.target_channel] = input_data[..., self.target_channel] * (self.max - self.min) + self.min
+ return input_data
diff --git a/basicts/scaler/z_score_scaler.py b/basicts/scaler/z_score_scaler.py
new file mode 100644
index 00000000..bdb58782
--- /dev/null
+++ b/basicts/scaler/z_score_scaler.py
@@ -0,0 +1,102 @@
+import json
+
+import torch
+import numpy as np
+
+from .base_scaler import BaseScaler
+
+
+class ZScoreScaler(BaseScaler):
+ """
+ ZScoreScaler performs Z-score normalization on the dataset, transforming the data to have a mean of zero
+ and a standard deviation of one. This is commonly used in preprocessing to normalize data, ensuring that
+ each feature contributes equally to the model.
+
+ Attributes:
+ mean (np.ndarray): The mean of the training data used for normalization.
+ If `norm_each_channel` is True, this is an array of means, one for each channel. Otherwise, it's a single scalar.
+ std (np.ndarray): The standard deviation of the training data used for normalization.
+ If `norm_each_channel` is True, this is an array of standard deviations, one for each channel. Otherwise, it's a single scalar.
+ target_channel (int): The specific channel (feature) to which normalization is applied.
+ By default, it is set to 0, indicating the first channel.
+ """
+
+ def __init__(self, dataset_name: str, train_ratio: float, norm_each_channel: bool, rescale: bool):
+ """
+ Initialize the ZScoreScaler by loading the dataset and fitting the scaler to the training data.
+
+ The scaler computes the mean and standard deviation from the training data, which is then used to
+ normalize the data during the `transform` operation.
+
+ Args:
+ dataset_name (str): The name of the dataset used to load the data.
+ train_ratio (float): The ratio of the dataset to be used for training. The scaler is fitted on this portion of the data.
+ norm_each_channel (bool): Flag indicating whether to normalize each channel separately.
+ If True, the mean and standard deviation are computed for each channel independently.
+ rescale (bool): Flag indicating whether to apply rescaling after normalization. This flag is included for consistency with
+ the base class but is not directly used in Z-score normalization.
+ """
+
+ super().__init__(dataset_name, train_ratio, norm_each_channel, rescale)
+ self.target_channel = 0 # assuming normalization on the first channel
+
+ # load dataset description and data
+ description_file_path = f'datasets/{dataset_name}/desc.json'
+ with open(description_file_path, 'r') as f:
+ description = json.load(f)
+ data_file_path = f'datasets/{dataset_name}/data.dat'
+ data = np.memmap(data_file_path, dtype='float32', mode='r', shape=tuple(description['shape']))
+
+ # split data into training set based on the train_ratio
+ train_size = int(len(data) * train_ratio)
+ train_data = data[:train_size, :, self.target_channel].copy()
+
+ # compute mean and standard deviation
+ if norm_each_channel:
+ self.mean = np.mean(train_data, axis=0, keepdims=True)
+ self.std = np.std(train_data, axis=0, keepdims=True)
+ self.std[self.std == 0] = 1.0 # prevent division by zero by setting std to 1 where it's 0
+ else:
+ self.mean = np.mean(train_data)
+ self.std = np.std(train_data)
+ if self.std == 0:
+ self.std = 1.0 # prevent division by zero by setting std to 1 where it's 0
+
+ def transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Apply Z-score normalization to the input data.
+
+ This method normalizes the input data using the mean and standard deviation computed from the training data.
+ The normalization is applied only to the specified `target_channel`.
+
+ Args:
+ input_data (torch.Tensor): The input data to be normalized.
+
+ Returns:
+ torch.Tensor: The normalized data with the same shape as the input.
+ """
+
+ input_data[..., self.target_channel] = (input_data[..., self.target_channel] - self.mean) / self.std
+ return input_data
+
+ def inverse_transform(self, input_data: torch.Tensor) -> torch.Tensor:
+ """
+ Reverse the Z-score normalization to recover the original data scale.
+
+ This method transforms the normalized data back to its original scale using the mean and standard deviation
+ computed from the training data. This is useful for interpreting model outputs or for further analysis in the original data scale.
+
+ Args:
+ input_data (torch.Tensor): The normalized data to be transformed back.
+
+ Returns:
+ torch.Tensor: The data transformed back to its original scale.
+ """
+
+ if isinstance(self.mean, np.ndarray):
+ self.mean = torch.tensor(self.mean, device=input_data.device)
+ self.std = torch.tensor(self.std, device=input_data.device)
+ # Clone the input data to prevent in-place modification (which is not allowed in PyTorch)
+ input_data = input_data.clone()
+ input_data[..., self.target_channel] = input_data[..., self.target_channel] * self.std + self.mean
+ return input_data
diff --git a/basicts/utils/__init__.py b/basicts/utils/__init__.py
index fd82be27..fda27ef6 100644
--- a/basicts/utils/__init__.py
+++ b/basicts/utils/__init__.py
@@ -1,10 +1,11 @@
-from .serialization import load_adj, load_pkl, dump_pkl, load_node2vec_emb
-from .misc import clock, check_nan_inf, remove_nan_inf
-from .misc import partial_func as partial
-from .m4 import m4_summary
from .xformer import data_transformation_4_xformer
+from .serialization import load_adj, load_pkl, dump_pkl, \
+ load_dataset_data, get_regular_settings, load_dataset_desc
+from .misc import clock, check_nan_inf, remove_nan_inf, \
+ partial_func as partial
-__all__ = ["load_adj", "load_pkl", "dump_pkl",
- "load_node2vec_emb", "clock", "check_nan_inf",
- "remove_nan_inf", "data_transformation_4_xformer",
- "partial", "m4_summary"]
+__all__ = ['load_adj', 'load_pkl', 'dump_pkl',
+ 'clock', 'check_nan_inf',
+ 'remove_nan_inf', 'data_transformation_4_xformer',
+ 'partial', 'get_regular_settings',
+ 'load_dataset_data', 'load_dataset_desc']
diff --git a/basicts/utils/adjacent_matrix_norm.py b/basicts/utils/adjacent_matrix_norm.py
index 417d4ac6..36d13218 100644
--- a/basicts/utils/adjacent_matrix_norm.py
+++ b/basicts/utils/adjacent_matrix_norm.py
@@ -4,99 +4,103 @@
def calculate_symmetric_normalized_laplacian(adj: np.ndarray) -> np.matrix:
- """Calculate yymmetric normalized laplacian.
- Assuming unnormalized laplacian matrix is `L = D - A`,
- then symmetric normalized laplacian matrix is:
- `L^{Sym} = D^-1/2 L D^-1/2 = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2`
- For node `i` and `j` where `i!=j`, L^{sym}_{ij} <=0.
+ """
+ Calculate the symmetric normalized Laplacian.
+
+ The symmetric normalized Laplacian matrix is given by:
+ L^{Sym} = I - D^{-1/2} A D^{-1/2}, where L is the unnormalized Laplacian,
+ D is the degree matrix, and A is the adjacency matrix.
Args:
- adj (np.ndarray): Adjacent matrix A
+ adj (np.ndarray): Adjacency matrix A.
Returns:
- np.matrix: Symmetric normalized laplacian L^{Sym}
+ np.matrix: Symmetric normalized Laplacian L^{Sym}.
"""
adj = sp.coo_matrix(adj)
- degree = np.array(adj.sum(1))
- # diagonals of D^{-1/2}
- degree_inv_sqrt = np.power(degree, -0.5).flatten()
- degree_inv_sqrt[np.isinf(degree_inv_sqrt)] = 0.
- matrix_degree_inv_sqrt = sp.diags(degree_inv_sqrt) # D^{-1/2}
- symmetric_normalized_laplacian = sp.eye(
- adj.shape[0]) - matrix_degree_inv_sqrt.dot(adj).dot(matrix_degree_inv_sqrt).tocoo()
- return symmetric_normalized_laplacian
+ degree = np.array(adj.sum(1)).flatten()
+ degree_inv_sqrt = np.power(degree, -0.5)
+ degree_inv_sqrt[np.isinf(degree_inv_sqrt)] = 0.0
+ matrix_degree_inv_sqrt = sp.diags(degree_inv_sqrt)
+ laplacian = sp.eye(adj.shape[0]) - matrix_degree_inv_sqrt.dot(adj).dot(matrix_degree_inv_sqrt).tocoo()
+ return laplacian
def calculate_scaled_laplacian(adj: np.ndarray, lambda_max: int = 2, undirected: bool = True) -> np.matrix:
- """Re-scaled the eigenvalue to [-1, 1] by scaled the normalized laplacian matrix for chebyshev pol.
- According to `2017 ICLR GCN`, the lambda max is set to 2, and the graph is set to undirected.
- Note that rescale the laplacian matrix is equal to rescale the eigenvalue matrix.
- `L_{scaled} = (2 / lambda_max * L) - I`
+ """
+ Scale the normalized Laplacian for use in Chebyshev polynomials.
+
+ Rescale the Laplacian matrix such that its eigenvalues are within the range [-1, 1].
Args:
- adj (np.ndarray): Adjacent matrix A
- lambda_max (int, optional): Defaults to 2.
- undirected (bool, optional): Defaults to True.
+ adj (np.ndarray): Adjacency matrix A.
+ lambda_max (int, optional): Maximum eigenvalue, defaults to 2.
+ undirected (bool, optional): If True, treats the graph as undirected, defaults to True.
Returns:
- np.matrix: The rescaled laplacian matrix.
+ np.matrix: Scaled Laplacian matrix.
"""
if undirected:
- adj = np.maximum.reduce([adj, adj.T])
- laplacian_matrix = calculate_symmetric_normalized_laplacian(adj)
- if lambda_max is None: # manually cal the max lambda
- lambda_max, _ = linalg.eigsh(laplacian_matrix, 1, which='LM')
+ adj = np.maximum(adj, adj.T)
+
+ laplacian = calculate_symmetric_normalized_laplacian(adj)
+
+ if lambda_max is None:
+ lambda_max, _ = linalg.eigsh(laplacian, 1, which='LM')
lambda_max = lambda_max[0]
- laplacian_matrix = sp.csr_matrix(laplacian_matrix)
- num_nodes, _ = laplacian_matrix.shape
- identity_matrix = sp.identity(
- num_nodes, format='csr', dtype=laplacian_matrix.dtype)
- laplacian_res = (2 / lambda_max * laplacian_matrix) - identity_matrix
- return laplacian_res
+ laplacian = sp.csr_matrix(laplacian)
+ identity = sp.identity(laplacian.shape[0], format='csr', dtype=laplacian.dtype)
+
+ scaled_laplacian = (2 / lambda_max) * laplacian - identity
+ return scaled_laplacian
def calculate_symmetric_message_passing_adj(adj: np.ndarray) -> np.matrix:
- """Calculate the renormalized message passing adj in `GCN`.
- A = A + I
- return D^{-1/2} A D^{-1/2}
+ """
+ Calculate the renormalized message-passing adjacency matrix as proposed in GCN.
+
+ The message-passing adjacency matrix is defined as A' = D^{-1/2} (A + I) D^{-1/2}.
Args:
- adj (np.ndarray): Adjacent matrix A
+ adj (np.ndarray): Adjacency matrix A.
Returns:
- np.matrix: Renormalized message passing adj in `GCN`.
+ np.matrix: Renormalized message-passing adjacency matrix.
"""
- # add self loop
- adj = adj + np.diag(np.ones(adj.shape[0], dtype=np.float32))
- # print("calculating the renormalized message passing adj, please ensure that self-loop has added to adj.")
+ adj = adj + np.eye(adj.shape[0], dtype=np.float32)
adj = sp.coo_matrix(adj)
- row_sum = np.array(adj.sum(1))
- d_inv_sqrt = np.power(row_sum, -0.5).flatten()
- d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
+
+ row_sum = np.array(adj.sum(1)).flatten()
+ d_inv_sqrt = np.power(row_sum, -0.5)
+ d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
+
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
- mp_adj = d_mat_inv_sqrt.dot(adj).transpose().dot(
- d_mat_inv_sqrt).astype(np.float32)
- return mp_adj
+ mp_adj = d_mat_inv_sqrt.dot(adj).transpose().dot(d_mat_inv_sqrt).astype(np.float32)
+ return mp_adj
def calculate_transition_matrix(adj: np.ndarray) -> np.matrix:
- """Calculate the transition matrix `P` proposed in DCRNN and Graph WaveNet.
- P = D^{-1}A = A/rowsum(A)
+ """
+ Calculate the transition matrix as proposed in DCRNN and Graph WaveNet.
+
+ The transition matrix is defined as P = D^{-1} A, where D is the degree matrix.
Args:
- adj (np.ndarray): Adjacent matrix A
+ adj (np.ndarray): Adjacency matrix A.
Returns:
- np.matrix: Transition matrix P
+ np.matrix: Transition matrix P.
"""
adj = sp.coo_matrix(adj)
row_sum = np.array(adj.sum(1)).flatten()
- d_inv = np.power(row_sum, -1).flatten()
- d_inv[np.isinf(d_inv)] = 0.
+ d_inv = np.power(row_sum, -1)
+ d_inv[np.isinf(d_inv)] = 0.0
+
d_mat = sp.diags(d_inv)
prob_matrix = d_mat.dot(adj).astype(np.float32).todense()
+
return prob_matrix
diff --git a/basicts/utils/logging.py b/basicts/utils/logging.py
deleted file mode 100644
index 58e6157a..00000000
--- a/basicts/utils/logging.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import logging
-from easytorch.utils.logging import logger_initialized
-
-
-def clear_loggers():
- for logger_name in logger_initialized:
- # logging.getLogger(logger_name).handlers.clear()
- logger = logging.getLogger(logger_name)
- # disable the logger
- # logger.disabled = True
- # remove handlers
- for handler in logger.handlers:
- handler.close()
- logger.handlers.clear()
- logger_initialized.clear()
diff --git a/basicts/utils/m4.py b/basicts/utils/m4.py
deleted file mode 100644
index 9644968f..00000000
--- a/basicts/utils/m4.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# This source code is provided for the purposes of scientific reproducibility
-# under the following limited license from Element AI Inc. The code is an
-# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
-# expansion analysis for interpretable time series forecasting,
-# https://arxiv.org/abs/1905.10437). The copyright to the source code is
-# licensed under the Creative Commons - Attribution-NonCommercial 4.0
-# International license (CC BY-NC 4.0):
-# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
-# for the benefit of third parties or internally in production) requires an
-# explicit license. The subject-matter of the N-BEATS model and associated
-# materials are the property of Element AI Inc. and may be subject to patent
-# protection. No license to patents is granted hereunder (whether express or
-# implied). Copyright © 2020 Element AI Inc. All rights reserved.
-
-# Modified from https://github.com/ServiceNow/N-BEATS
-
-"""
-M4 Summary
-"""
-import os
-from glob import glob
-from dataclasses import dataclass
-from collections import OrderedDict
-
-import numpy as np
-import pandas as pd
-
-Forecast = np.ndarray
-Target = np.ndarray
-
-
-@dataclass()
-class M4Dataset:
- ids: np.ndarray
- groups: np.ndarray
- frequencies: np.ndarray
- horizons: np.ndarray
- values: np.ndarray
-
- @staticmethod
- def load(info_file_path: str = None, data: np.array = None) -> "M4Dataset":
- """
- Load cached dataset.
-
- :param training: Load training part if training is True, test part otherwise.
- """
- m4_info = pd.read_csv(info_file_path)
- ids = m4_info.M4id.values
- groups = m4_info.SP.values
- frequencies = m4_info.Frequency.values
- horizons = m4_info.Horizon.values
- values = data
- return M4Dataset(ids=ids, groups=groups, frequencies=frequencies, horizons=horizons, values=values)
-
-def mase(forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: int) -> np.ndarray:
- """
- MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
-
- :param forecast: Forecast values. Shape: batch, time_o
- :param insample: Insample values. Shape: batch, time_i
- :param outsample: Target values. Shape: batch, time_o
- :param frequency: Frequency value
- :return: Same shape array with error calculated for each time step
- """
- return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
-
-
-def smape_2(forecast: Forecast, target: Target) -> np.ndarray:
- """
- sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
-
- :param forecast: Forecast values. Shape: batch, time
- :param target: Target values. Shape: batch, time
- :return: Same shape array with sMAPE calculated for each time step of each timeseries.
- """
- denom = np.abs(target) + np.abs(forecast)
- # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
- denom[denom == 0.0] = 1.0
- return 200 * np.abs(forecast - target) / denom
-
-
-def group_values(values: np.ndarray, groups: np.ndarray, group_name: str) -> np.ndarray:
- """
- Filter values array by group indices and clean it from NaNs.
-
- :param values: Values to filter.
- :param groups: Timeseries groups.
- :param group_name: Group name to filter by.
- :return: Filtered and cleaned timeseries.
- """
- return np.array([v[~np.isnan(v)] for v in values[groups == group_name]], dtype=object)
-
-
-class M4Summary:
- def __init__(self, info_file_path, train_values, test_values, naive_forecast_file_path):
- self.training_set = M4Dataset.load(info_file_path, train_values)
- self.test_set = M4Dataset.load(info_file_path, test_values)
- self.naive_forecast_file_path = naive_forecast_file_path
-
- def evaluate(self, forecast: np.ndarray):
- """
- Evaluate forecasts using M4 test dataset.
-
- :param forecast: Forecasts. Shape: timeseries, time.
- :return: sMAPE and OWA grouped by seasonal patterns.
- """
- forecast = np.array([v[~np.isnan(v)] for v in forecast], dtype=object)
-
- grouped_smapes = {group_name:
- np.mean(smape_2(forecast=group_values(values=forecast,
- groups=self.test_set.groups,
- group_name=group_name),
- target=group_values(values=self.test_set.values,
- groups=self.test_set.groups,
- group_name=group_name)))
- for group_name in np.unique(self.test_set.groups)}
- grouped_smapes = self.summarize_groups(grouped_smapes)
-
- grouped_owa = OrderedDict()
-
- naive2_forecasts = pd.read_csv(self.naive_forecast_file_path).values[:, 1:].astype(np.float32)
- naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts], dtype=object)
-
- model_mases = {}
- naive2_smapes = {}
- naive2_mases = {}
- for group_name in np.unique(self.test_set.groups):
- model_forecast = group_values(forecast, self.test_set.groups, group_name)
- naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
-
- target = group_values(self.test_set.values, self.test_set.groups, group_name)
- # all timeseries within group have same frequency
- frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
- insample = group_values(self.training_set.values, self.test_set.groups, group_name)
-
- model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
- insample=insample[i],
- outsample=target[i],
- frequency=frequency) for i in range(len(model_forecast))])
- naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
- insample=insample[i],
- outsample=target[i],
- frequency=frequency) for i in range(len(model_forecast))])
-
- naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
- grouped_model_mases = self.summarize_groups(model_mases)
- grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
- grouped_naive2_mases = self.summarize_groups(naive2_mases)
- for k in grouped_model_mases.keys():
- grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
- grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
- def round_all(d):
- return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
- return round_all(grouped_smapes), round_all(grouped_model_mases), round_all(grouped_owa)
-
- def summarize_groups(self, scores):
- """
- Re-group scores respecting M4 rules.
- :param scores: Scores per group.
- :return: Grouped scores.
- """
- scores_summary = OrderedDict()
-
- def group_count(group_name):
- return len(np.where(self.test_set.groups == group_name)[0])
-
- weighted_score = {}
- for g in ["Yearly", "Quarterly", "Monthly"]:
- weighted_score[g] = scores[g] * group_count(g)
- scores_summary[g] = scores[g]
-
- others_score = 0
- others_count = 0
- for g in ["Weekly", "Daily", "Hourly"]:
- others_score += scores[g] * group_count(g)
- others_count += group_count(g)
- weighted_score["Others"] = others_score
- scores_summary["Others"] = others_score / others_count
-
- average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
- scores_summary["Average"] = average
-
- return scores_summary
-
-
-def m4_summary(save_dir, project_dir):
- """Summary evaluation for M4 dataset.
-
- Args:
- save_dir (str): Directory where prediction results are saved. All "{save_dir}/M4_{seasonal pattern}.npy" should exist.
- Seasonal patterns = ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- project_dir (str): Project directory. The M4 raw data should be in "{project_dir}/datasets/raw_data/M4".
- """
- seasonal_patterns = ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"] # the order cannot be changed
- data_dir = project_dir + "/datasets/raw_data/M4"
- info_file_path = data_dir + "/M4-info.csv"
-
- m4_info = pd.read_csv(info_file_path)
- m4_ids = m4_info.M4id.values
- def build_cache(files: str) -> None:
- timeseries_dict = OrderedDict(list(zip(m4_ids, [[]] * len(m4_ids))))
- for train_csv in glob(os.path.join(data_dir, files)):
- dataset = pd.read_csv(train_csv)
- dataset.set_index(dataset.columns[0], inplace=True)
- for m4id, row in dataset.iterrows():
- values = row.values
- timeseries_dict[m4id] = values[~np.isnan(values)]
- return np.array(list(timeseries_dict.values()), dtype=object)
-
- print("Building cache for M4 dataset...")
- # read prediction and ground truth
- prediction = []
- for seasonal_pattern in seasonal_patterns:
- prediction.extend(np.load(save_dir + "/M4_{0}.npy".format(seasonal_pattern)))
- prediction = np.array(prediction, dtype=object)
- train_values = build_cache("*-train.csv")
- test_values = build_cache("*-test.csv")
- print("Summarizing M4 dataset...")
- summary = M4Summary(info_file_path, train_values, test_values, data_dir + "/submission-Naive2.csv")
- results = pd.DataFrame(summary.evaluate(prediction), index=["SMAPE", "MASE", "OWA"])
- return results
diff --git a/basicts/utils/misc.py b/basicts/utils/misc.py
index 4bec6919..dc74959a 100644
--- a/basicts/utils/misc.py
+++ b/basicts/utils/misc.py
@@ -1,94 +1,66 @@
-import os
import time
-import importlib
-from typing import List
from functools import partial
import torch
-from easytorch.utils.misc import scan_dir
-def scan_modules(work_dir: str, file_dir: str, exclude_files: List[str] = None, exclude_dirs: List[str] = None):
- """
- overwrite easytorch.utils.scan_modeuls: automatically scan and import modules for registry, and exclude some files and dirs.
+class partial_func(partial):
"""
- module_dir = os.path.dirname(os.path.abspath(file_dir))
- import_prefix = module_dir[module_dir.find(work_dir) + len(work_dir) + 1:].replace("/", ".").replace("\\", ".")
-
- if exclude_files is None:
- exclude_files = []
- if exclude_dirs is None:
- exclude_dirs = []
-
- # get all file names, and remove the files in exclude_files
- model_file_names = [
- v[:v.find(".py")].replace("/", ".").replace("\\", ".") \
- for v in scan_dir(module_dir, suffix="py", recursive=True) if v not in exclude_files
- ]
-
- # remove the files in exclude_dirs. TODO: use os.path to check
- for exclude_dir in exclude_dirs:
- exclude_dir = exclude_dir.replace("/", ".").replace("\\", ".")
- model_file_names = [file_name for file_name in model_file_names if exclude_dir not in file_name]
+ Custom partial function class that provides a cleaner string representation.
- # import all modules
- return [importlib.import_module(f"{import_prefix}.{file_name}") for file_name in model_file_names]
-
-
-class partial_func(partial):
- """partial class.
- __str__ in functools.partial contains the address of the function, which changes randomly and will disrupt easytorch's md5 calculation.
+ This prevents the address of the function from being included, which can cause issues with hashing.
"""
def __str__(self):
- return "partial({}, {})".format(self.func.__name__, self.keywords)
+ return f"partial({self.func.__name__}, {self.keywords})"
def clock(func):
- """clock decorator"""
- def clocked(*args, **kw):
- """decorator for clock"""
+ """
+ Decorator to measure the execution time of a function.
+
+ This decorator prints the time taken for a function to execute.
+ """
+
+ def clocked(*args, **kwargs):
t0 = time.perf_counter()
- result = func(*args, **kw)
+ result = func(*args, **kwargs)
elapsed = time.perf_counter() - t0
- name = func.__name__
- print("%s: %0.8fs..." % (name, elapsed))
+ print(f"{func.__name__}: {elapsed:.8f}s")
return result
return clocked
-
def check_nan_inf(tensor: torch.Tensor, raise_ex: bool = True) -> tuple:
- """check nan and in in tensor
+ """
+ Check for NaN or Inf values in a tensor.
Args:
- tensor (torch.Tensor): Tensor
- raise_ex (bool, optional): If raise exceptions. Defaults to True.
+ tensor (torch.Tensor): Input tensor to check.
+ raise_ex (bool, optional): Whether to raise an exception if NaN or Inf values are found. Defaults to True.
Raises:
- Exception: If raise_ex is True and there are nans or infs in tensor, then raise Exception.
+ ValueError: If raise_ex is True and NaN or Inf values are found.
Returns:
- dict: {'nan': bool, 'inf': bool}
- bool: if exist nan or if
+ tuple: A dictionary indicating presence of NaN and Inf values, and a boolean indicating whether either is present.
"""
- # nan
nan = torch.any(torch.isnan(tensor))
- # inf
inf = torch.any(torch.isinf(tensor))
- # raise
+
if raise_ex and (nan or inf):
- raise Exception({"nan": nan, "inf": inf})
- return {"nan": nan, "inf": inf}, nan or inf
+ raise ValueError({"nan": nan, "inf": inf})
+ return {"nan": nan, "inf": inf}, nan or inf
-def remove_nan_inf(tensor: torch.Tensor):
- """remove nan and inf in tensor
+def remove_nan_inf(tensor: torch.Tensor) -> torch.Tensor:
+ """
+ Remove NaN and Inf values from a tensor by replacing them with zeros.
Args:
- tensor (torch.Tensor): input tensor
+ tensor (torch.Tensor): Input tensor.
Returns:
- torch.Tensor: output tensor
+ torch.Tensor: Tensor with NaN and Inf values replaced by zeros.
"""
tensor = torch.where(torch.isnan(tensor), torch.zeros_like(tensor), tensor)
diff --git a/basicts/utils/serialization.py b/basicts/utils/serialization.py
index 60c015c8..1c57a916 100644
--- a/basicts/utils/serialization.py
+++ b/basicts/utils/serialization.py
@@ -1,103 +1,129 @@
+import json
import pickle
-
-import torch
import numpy as np
+from .adjacent_matrix_norm import (
+ calculate_scaled_laplacian,
+ calculate_symmetric_normalized_laplacian,
+ calculate_symmetric_message_passing_adj,
+ calculate_transition_matrix
+)
+
+
+def get_regular_settings(dataset_name: str) -> dict:
+ """
+ Get the regular settings for a dataset.
+
+ Args:
+ dataset_name (str): Name of the dataset.
+
+ Returns:
+ dict: Regular settings for the dataset.
+ """
+
+ # read json file: datasets/dataset_name/desc.json
+ desc = load_dataset_desc(dataset_name)
+ regular_settings = desc['regular_settings']
+ return regular_settings
+
+def load_dataset_desc(dataset_name: str) -> str:
+ """
+ Get the description of a dataset.
+
+ Args:
+ dataset_name (str): Name of the dataset.
+
+ Returns:
+ str: Description of the dataset.
+ """
-from .adjacent_matrix_norm import calculate_scaled_laplacian, calculate_symmetric_normalized_laplacian, calculate_symmetric_message_passing_adj, calculate_transition_matrix
+ # read json file: datasets/dataset_name/desc.json
+ with open(f'datasets/{dataset_name}/desc.json', 'r') as f:
+ desc = json.load(f)
+ return desc
+def load_dataset_data(dataset_name: str) -> np.ndarray:
+ """
+ Load data from a .dat file (memmap) via numpy.
+
+ Args:
+ dataset_name (str): Path to the .dat file.
+
+ Returns:
+ np.ndarray: Loaded data.
+ """
+
+ shape = load_dataset_desc(dataset_name)['shape']
+ dat_file_path = f'datasets/{dataset_name}/data.dat'
+ data = np.memmap(dat_file_path, mode='r', dtype=np.float32, shape=tuple(shape)).copy()
+ return data
def load_pkl(pickle_file: str) -> object:
- """Load pickle data.
+ """
+ Load data from a pickle file.
Args:
- pickle_file (str): file path
+ pickle_file (str): Path to the pickle file.
Returns:
- object: loaded objected
+ object: Loaded object from the pickle file.
"""
try:
- with open(pickle_file, "rb") as f:
+ with open(pickle_file, 'rb') as f:
pickle_data = pickle.load(f)
except UnicodeDecodeError:
- with open(pickle_file, "rb") as f:
- pickle_data = pickle.load(f, encoding="latin1")
+ with open(pickle_file, 'rb') as f:
+ pickle_data = pickle.load(f, encoding='latin1')
except Exception as e:
- print("Unable to load data ", pickle_file, ":", e)
+ print(f'Unable to load data from {pickle_file}: {e}')
raise
return pickle_data
-
def dump_pkl(obj: object, file_path: str):
- """Dumplicate pickle data.
+ """
+ Save an object to a pickle file.
Args:
- obj (object): object
- file_path (str): file path
+ obj (object): Object to save.
+ file_path (str): Path to the file.
"""
- with open(file_path, "wb") as f:
+ with open(file_path, 'wb') as f:
pickle.dump(obj, f)
-
def load_adj(file_path: str, adj_type: str):
- """load adjacency matrix.
+ """
+ Load and preprocess an adjacency matrix.
Args:
- file_path (str): file path
- adj_type (str): adjacency matrix type
+ file_path (str): Path to the file containing the adjacency matrix.
+ adj_type (str): Type of adjacency matrix preprocessing.
Returns:
- list of numpy.matrix: list of preproceesed adjacency matrices
- np.ndarray: raw adjacency matrix
+ list: List of processed adjacency matrices.
+ np.ndarray: Raw adjacency matrix.
"""
try:
- # METR and PEMS_BAY
_, _, adj_mx = load_pkl(file_path)
except ValueError:
- # PEMS04
adj_mx = load_pkl(file_path)
- if adj_type == "scalap":
+
+ if adj_type == 'scalap':
adj = [calculate_scaled_laplacian(adj_mx).astype(np.float32).todense()]
- elif adj_type == "normlap":
- adj = [calculate_symmetric_normalized_laplacian(
- adj_mx).astype(np.float32).todense()]
- elif adj_type == "symnadj":
- adj = [calculate_symmetric_message_passing_adj(
- adj_mx).astype(np.float32).todense()]
- elif adj_type == "transition":
+ elif adj_type == 'normlap':
+ adj = [calculate_symmetric_normalized_laplacian(adj_mx).astype(np.float32).todense()]
+ elif adj_type == 'symnadj':
+ adj = [calculate_symmetric_message_passing_adj(adj_mx).astype(np.float32).todense()]
+ elif adj_type == 'transition':
adj = [calculate_transition_matrix(adj_mx).T]
- elif adj_type == "doubletransition":
+ elif adj_type == 'doubletransition':
adj = [calculate_transition_matrix(adj_mx).T, calculate_transition_matrix(adj_mx.T).T]
- elif adj_type == "identity":
+ elif adj_type == 'identity':
adj = [np.diag(np.ones(adj_mx.shape[0])).astype(np.float32)]
- elif adj_type == "original":
+ elif adj_type == 'original':
adj = [adj_mx]
else:
- error = 0
- assert error, "adj type not defined"
- return adj, adj_mx
+ raise ValueError('Undefined adjacency matrix type.')
-
-def load_node2vec_emb(file_path: str) -> torch.Tensor:
- """load node2vec embedding
-
- Args:
- file_path (str): file path
-
- Returns:
- torch.Tensor: node2vec embedding
- """
-
- # spatial embedding
- with open(file_path, mode="r") as f:
- lines = f.readlines()
- temp = lines[0].split(" ")
- num_vertex, dims = int(temp[0]), int(temp[1])
- spatial_embeddings = torch.zeros((num_vertex, dims), dtype=torch.float32)
- for line in lines[1:]:
- temp = line.split(" ")
- index = int(temp[0])
- spatial_embeddings[index] = torch.Tensor([float(ch) for ch in temp[1:]])
- return spatial_embeddings
+ return adj, adj_mx
diff --git a/datasets/README.md b/datasets/README.md
index dd4601be..cefa2d7d 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -150,7 +150,6 @@ Visibility, DryBulbFarenheit, DryBulbCelsius, WetBulbFarenheit, DewPointFarenhei
**Description**: PEMS0X is a series of traffic flow dataset, including PEMS03, PEMS04, PEMS07, and PEMS08. X represents the code of district where the data is collected. The traffic information is recorded at the rate of every 5 minutes. Similar to METR-LA and PEMS-BAY, PEMS0X also includes a sensor graph to indicate dependencies between sensors. The details of the computation of the adjacency matrix can be found in the [ASTGCN](https://ojs.aaai.org/index.php/AAAI/article/view/3881/3759).
-
**Period**:
- PEMS03: 2018/09/01 -> 2018/11/30
@@ -186,10 +185,8 @@ Visibility, DryBulbFarenheit, DryBulbCelsius, WetBulbFarenheit, DewPointFarenhei
**Description**: LargeST is a series of large scale traffic flow datasets, including CA, GLA, GBA, SD. Similar to METR-LA and PEMS-BAY, PEMS0X also includes a sensor graph to indicate dependencies between sensors. Moreover, LargeST also includes a meta data graph of each node. Following the original paper, we use the data from 2019. The details of largeST can be found in the [LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting.](https://arxiv.org/pdf/2306.08259.pdf)
-
**Period**: 2019
-
**Number of Time Steps**:
- CA: 35040
@@ -210,4 +207,41 @@ Visibility, DryBulbFarenheit, DryBulbCelsius, WetBulbFarenheit, DewPointFarenhei
**Typical Settings**:
-- Spatial temporal forecasting.
\ No newline at end of file
+- Spatial temporal forecasting.
+
+### 9. Illness
+
+**Source**: [Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting, NeurIPS 2021](https://github.com/thuml/Autoformer). [Data Link](https://github.com/thuml/Autoformer)
+
+**Description**: Illness includes the weekly recorded influenza-like illness (ILI) patients data from Centers for Disease Control and Prevention of the United States between 2002 and 2021, which describes the ratio of patients seen with ILI and the total number of the patients.
+
+**Period**: 2002-01-01 -> 2020-06-30
+
+**Number of Time Steps**: 966
+
+**Dataset Split**: 7:1:2
+
+**Variates**: % WEIGHTED ILI, %UNWEIGHTED ILI, AGE 0-4, AGE 5-24, ILITOTAL, NUM. OF PROVIDERS, OT
+
+**Typical Settings**:
+
+- Long time series forecasting.
+
+
+### 10. Traffic
+
+**Source**: [Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting, NeurIPS 2021](https://github.com/thuml/Autoformer). [Data Link](https://github.com/thuml/Autoformer)
+
+**Description**: Traffic is a collection of hourly data from California Department of Transportation, which describes the road occupancy rates measured by different sensors on San Francisco Bay area freeways.
+
+**Period**: 2016-07-01 02:00:00 -> 2018-07-02 01:00:00
+
+**Number of Time Steps**: 17544
+
+**Dataset Split**: 7:1:2
+
+**Variates**: Data measured by 862 sensors.
+
+**Typical Settings**:
+
+- Long time series forecasting.
diff --git a/examples/arch.py b/examples/arch.py
new file mode 100644
index 00000000..e9af348a
--- /dev/null
+++ b/examples/arch.py
@@ -0,0 +1,52 @@
+import torch
+from torch import nn
+
+class MultiLayerPerceptron(nn.Module):
+ """
+ A simple Multi-Layer Perceptron (MLP) model with two fully connected layers.
+
+ This model is designed to take historical time series data as input and produce future predictions.
+ It consists of two linear layers with a ReLU activation in between.
+
+ Attributes:
+ fc1 (nn.Linear): The first fully connected layer, which maps the input history sequence to a hidden dimension.
+ fc2 (nn.Linear): The second fully connected layer, which maps the hidden dimension to the prediction sequence.
+ act (nn.ReLU): The ReLU activation function applied between the two layers.
+ """
+
+ def __init__(self, history_seq_len: int, prediction_seq_len: int, hidden_dim: int):
+ """
+ Initialize the MultiLayerPerceptron model.
+
+ Args:
+ history_seq_len (int): The length of the input history sequence.
+ prediction_seq_len (int): The length of the output prediction sequence.
+ hidden_dim (int): The number of units in the hidden layer.
+ """
+ super().__init__()
+ self.fc1 = nn.Linear(history_seq_len, hidden_dim)
+ self.fc2 = nn.Linear(hidden_dim, prediction_seq_len)
+ self.act = nn.ReLU()
+
+ def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool) -> torch.Tensor:
+ """
+ Perform a forward pass through the network.
+
+ Args:
+ history_data (torch.Tensor): A tensor containing historical data, typically of shape `[B, L, N, C]`.
+ future_data (torch.Tensor): A tensor containing future data, typically of shape `[B, L, N, C]`.
+ batch_seen (int): The number of batches seen so far during training.
+ epoch (int): The current epoch number.
+ train (bool): Flag indicating whether the model is in training mode.
+
+ Returns:
+ torch.Tensor: The output prediction tensor, typically of shape `[B, L, N, C]`.
+ """
+
+ history_data = history_data[..., 0].transpose(1, 2) # [B, L, N, C] -> [B, N, L]
+
+ # [B, N, L] --h=act(fc1(x))--> [B, N, D] --fc2(h)--> [B, N, L] -> [B, L, N]
+ prediction = self.fc2(self.act(self.fc1(history_data))).transpose(1, 2)
+
+ # [B, L, N] -> [B, L, N, 1]
+ return prediction.unsqueeze(-1)
diff --git a/examples/complete_config.py b/examples/complete_config.py
new file mode 100644
index 00000000..71df2742
--- /dev/null
+++ b/examples/complete_config.py
@@ -0,0 +1,213 @@
+############################## Import Dependencies ##############################
+
+import os
+import sys
+from easydict import EasyDict
+
+# TODO: Remove this when basicts can be installed via pip
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+# Import metrics & loss functions
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+# Import dataset class
+from basicts.data import TimeSeriesForecastingDataset
+# Import runner class
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+# Import scaler class
+from basicts.scaler import ZScoreScaler
+# Import model architecture
+from .arch import MultiLayerPerceptron as MLP
+
+from basicts.utils import get_regular_settings
+
+############################## Hot Parameters ##############################
+
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+
+# Model architecture and parameters
+MODEL_ARCH = MLP
+MODEL_PARAM = {
+ 'history_seq_len': INPUT_LEN,
+ 'prediction_seq_len': OUTPUT_LEN,
+ 'hidden_dim': 64
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+
+CFG = EasyDict()
+
+# General settings
+CFG.DESCRIPTION = 'An Example Config' # Description of this config, not used in the BasicTS, but useful for the user to remember the purpose of this config
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner # Runner class
+
+############################## Environment Configuration ##############################
+
+CFG.ENV = EasyDict() # Environment settings. Default: None
+
+# GPU and random seed settings
+CFG.ENV.TF32 = False # Whether to use TensorFloat-32 in GPU. Default: False. See https://pytorch.org/docs/stable/notes/cuda.html#tf32-on-ampere.
+CFG.ENV.SEED = 42 # Random seed. Default: 42
+CFG.ENV.DETERMINISTIC = False # Whether to set the random seed to get deterministic results. Default: True
+CFG.ENV.CUDNN = EasyDict()
+CFG.ENV.CUDNN.ENABLED = True # Whether to enable cuDNN. Default: False
+CFG.ENV.CUDNN.BENCHMARK = True# Whether to enable cuDNN benchmark. Default: False
+CFG.ENV.CUDNN.DETERMINISTIC = False # Whether to set cuDNN to deterministic mode. Default: True
+
+############################## Dataset Configuration ##############################
+
+CFG.DATASET = EasyDict() # Dataset settings. Default: None. If not specified, get the training, validation, and test datasets from CFG.[TRAIN, VAL, TEST].DATA.DATASET.
+
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME # Name of the dataset, used for saving checkpoints and setting the process title.
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset # Dataset class use in both training, validation, and test.
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+}) # Parameters for the dataset class
+
+############################## Scaler Configuration ##############################
+
+CFG.SCALER = EasyDict() # Scaler settings. Default: None. If not specified, the data will not be normalized, i.e., the data will be used directly for training, validation, and test.
+
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+}) # Parameters for the scaler class
+
+############################## Model Configuration ##############################
+
+CFG.MODEL = EasyDict() # Model settings, must be specified.
+
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__ # Model name, must be specified, used for saving checkpoints and set the process title.
+CFG.MODEL.ARCH = MODEL_ARCH # Model architecture, must be specified.
+CFG.MODEL.PARAM = MODEL_PARAM # Model parameters
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] # Features used as input. The size of input data `history_data` is usually [B, L, N, C], this parameter specifies the index of the last dimension, i.e., history_data[:, :, :, CFG.MODEL.FORWARD_FEATURES].
+CFG.MODEL.TARGET_FEATURES = [0] # Features used as output. The size of target data `future_data` is usually [B, L, N, C], this parameter specifies the index of the last dimension, i.e., future_data[:, :, :, CFG.MODEL.TARGET_FEATURES].
+CFG.MODEL.SETUP_GRAPH = False # Whether to set up the computation graph. Default: False. Implementation of many works (e.g., DCRNN, GTS) acts like TensorFlow, which creates parameters in the first feedforward process.
+CFG.MODEL.DDP_FIND_UNUSED_PARAMETERS = False # Controls the `find_unused_parameters parameter` of `torch.nn.parallel.DistributedDataParallel`. In distributed computing, if there are unused parameters in the forward process, PyTorch usually raises a RuntimeError. In such cases, this parameter should be set to True.
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict() # Metrics settings. Default: None. If not specified, the default metrics will be used.
+
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ }) # Metrics functions, default: MAE, MSE, RMSE, MAPE, WAPE
+CFG.METRICS.TARGET = 'MAE' # Target metric, used for saving best checkpoints.
+CFG.METRICS.BEST = 'min' # Best metric, used for saving best checkpoints. 'min' or 'max'. Default: 'min'. If 'max', the larger the metric, the better.
+CFG.METRICS.NULL_VAL = NULL_VAL # Null value for the metric. Default: np.nan
+
+############################## Training Configuration ##############################
+
+CFG.TRAIN = EasyDict() # Training settings, must be specified for training.
+
+# Training parameters
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+) # Directory to save checkpoints. Default: 'checkpoints/{MODEL_NAME}/{DATASET_NAME}_{NUM_EPOCHS}_{INPUT_LEN}_{OUTPUT_LEN}'
+CFG.TRAIN.CKPT_SAVE_STRATEGY = None # Checkpoint save strategy. `CFG.TRAIN.CKPT_SAVE_STRATEGY` should be None, an int value, a list or a tuple. None: remove last checkpoint file every epoch. Default: None. Int: save checkpoint every `CFG.TRAIN.CKPT_SAVE_STRATEGY` epoch. List or Tuple: save checkpoint when epoch in `CFG.TRAIN.CKPT_SAVE_STRATEGY, remove last checkpoint file when last_epoch not in ckpt_save_strategy
+CFG.TRAIN.FINETUNE_FROM = None # Checkpoint path for fine-tuning. Default: None. If not specified, the model will be trained from scratch.
+CFG.TRAIN.STRICT_LOAD = True # Whether to strictly load the checkpoint. Default: True.
+
+# Loss function
+CFG.TRAIN.LOSS = masked_mae # Loss function, must be specified for training.
+
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict() # Optimizer settings, must be specified for training.
+CFG.TRAIN.OPTIM.TYPE = 'Adam' # Optimizer type, must be specified for training.
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr': 0.002,
+ 'weight_decay': 0.0001,
+ } # Optimizer parameters
+
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict() # Learning rate scheduler settings. Default: None. If not specified, the learning rate will not be adjusted during training.
+CFG.TRAIN.LR_SCHEDULER.TYPE = 'MultiStepLR' # Learning rate scheduler type.
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+ 'milestones': [1, 50, 80],
+ 'gamma': 0.5
+ } # Learning rate scheduler parameters
+
+# Early stopping
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = None # Early stopping patience. Default: None. If not specified, the early stopping will not be used.
+
+# gradient clip settings
+CFG.TRAIN.CLIP_GRAD_PARAM = None # Gradient clipping parameters. Default: None. If not specified, the gradient clipping will not be used.
+
+# Curriculum learning settings
+CFG.TRAIN.CL = EasyDict() # Curriculum learning settings. Default: None. If not specified, the curriculum learning will not be used.
+CFG.TRAIN.CL.CL_EPOCHS = 1 # Number of epochs for each curriculum learning stage, must be specified if CFG.TRAIN.CL is specified.
+CFG.TRAIN.CL.WARM_EPOCHS = 0 # Number of warm-up epochs. Default: 0
+CFG.TRAIN.CL.PREDICTION_LENGTH = OUTPUT_LEN # Total prediction length, must be specified if CFG.TRAIN.CL is specified.
+CFG.TRAIN.CL.STEP_SIZE = 1 # Step size for the curriculum learning. Default: 1. The current prediction length will be increased by CFG.TRAIN.CL.STEP_SIZE in each stage.
+
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict() # Training dataloader settings, must be specified for training.
+CFG.TRAIN.DATA.PREFETCH = False # Whether to use dataloader with prefetch. See https://github.com/justheuristic/prefetch_generator. Default: False.
+CFG.TRAIN.DATA.BATCH_SIZE = 64 # Batch size for training. Default: 1
+CFG.TRAIN.DATA.SHUFFLE = True # Whether to shuffle the training data. Default: False
+CFG.TRAIN.DATA.COLLATE_FN = None # Collate function for the training dataloader. Default: None
+CFG.TRAIN.DATA.NUM_WORKERS = 0 # Number of workers for the training dataloader. Default: 0
+CFG.TRAIN.DATA.PIN_MEMORY = False # Whether to pin memory for the training dataloader. Default: False
+
+############################## Validation Configuration ##############################
+
+CFG.VAL = EasyDict()
+
+# Validation parameters
+CFG.VAL.INTERVAL = 1 # Conduct validation every `CFG.VAL.INTERVAL` epochs. Default: 1
+CFG.VAL.DATA = EasyDict() # See CFG.TRAIN.DATA
+CFG.VAL.DATA.PREFETCH = False
+CFG.VAL.DATA.BATCH_SIZE = 64
+CFG.VAL.DATA.SHUFFLE = False
+CFG.VAL.DATA.COLLATE_FN = None
+CFG.VAL.DATA.NUM_WORKERS = 0
+CFG.VAL.DATA.PIN_MEMORY = False
+
+############################## Test Configuration ##############################
+
+CFG.TEST = EasyDict()
+
+# Test parameters
+CFG.TEST.INTERVAL = 1 # Conduct test every `CFG.TEST.INTERVAL` epochs. Default: 1
+CFG.TEST.DATA = EasyDict() # See CFG.TRAIN.DATA
+CFG.VAL.DATA.PREFETCH = False
+CFG.TEST.DATA.BATCH_SIZE = 64
+CFG.TEST.DATA.SHUFFLE = False
+CFG.TEST.DATA.COLLATE_FN = None
+CFG.TEST.DATA.NUM_WORKERS = 0
+CFG.TEST.DATA.PIN_MEMORY = False
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
diff --git a/examples/regular_config.py b/examples/regular_config.py
new file mode 100644
index 00000000..6557544e
--- /dev/null
+++ b/examples/regular_config.py
@@ -0,0 +1,116 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+from .arch import MultiLayerPerceptron as MLP
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08' # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = MLP
+MODEL_PARAM = {
+ 'history_seq_len': INPUT_LEN,
+ 'prediction_seq_len': OUTPUT_LEN,
+ 'hidden_dim': 64
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+ 'input_len': INPUT_LEN,
+ 'output_len': OUTPUT_LEN,
+ # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+ 'dataset_name': DATA_NAME,
+ 'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+ 'norm_each_channel': NORM_EACH_CHANNEL,
+ 'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+ 'MAE': masked_mae,
+ 'MAPE': masked_mape,
+ 'RMSE': masked_rmse,
+ })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+ 'checkpoints',
+ MODEL_ARCH.__name__,
+ '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+ 'lr': 0.002,
+ 'weight_decay': 0.0001,
+ }
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
diff --git a/experiments/evaluate.py b/experiments/evaluate.py
new file mode 100644
index 00000000..612155c6
--- /dev/null
+++ b/experiments/evaluate.py
@@ -0,0 +1,23 @@
+import os
+import sys
+import time
+sys.path.append(os.path.abspath(__file__ + '/../..'))
+from argparse import ArgumentParser
+
+import basicts
+
+def parse_args():
+ parser = ArgumentParser(description="Evaluate time series forecasting model in BasicTS framework!")
+ parser.add_argument("-cfg", "--config", default="examples/complete_config.py", help="training config")
+ parser.add_argument("-ckpt", "--checkpoint", default="")
+ parser.add_argument("-g", "--gpus", default="0")
+ parser.add_argument("-d", "--device_type", default="gpu")
+ parser.add_argument("-b", "--batch_size", default=None)
+
+ return parser.parse_args()
+
+if __name__ == '__main__':
+
+ args = parse_args()
+
+ basicts.launch_evaluation(cfg=args.config, ckpt_path=args.checkpoint, device_type=args.device_type, gpus=args.gpus, batch_size=args.batch_size)
diff --git a/experiments/inference.py b/experiments/inference.py
deleted file mode 100644
index 2ee8a2ab..00000000
--- a/experiments/inference.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import os
-import sys
-import time
-sys.path.append(os.path.abspath(__file__ + '/../..'))
-from argparse import ArgumentParser
-
-from basicts import launch_runner, BaseRunner
-
-
-def inference(cfg: dict, runner: BaseRunner, ckpt: str = None, batch_size: int = 1):
- # init logger
- runner.init_logger(logger_name='easytorch-inference', log_file_name='validate_result')
- # init model
- cfg.TEST.DATA.BATCH_SIZE = batch_size
- runner.model.eval()
- runner.setup_graph(cfg=cfg, train=False)
- # load model checkpoint
- runner.load_model(ckpt_path=ckpt)
- # inference & speed
- t0 = time.perf_counter()
- runner.test_pipline(cfg)
- elapsed = time.perf_counter() - t0
-
- print('##############################')
- runner.logger.info('%s: %0.8fs' % ('Speed', elapsed))
- runner.logger.info('# Param: {0}'.format(sum(p.numel() for p in runner.model.parameters() if p.requires_grad)))
-
-if __name__ == '__main__':
- MODEL_NAME = 'AGCRN'
- DATASET_NAME = 'PEMS03'
- BATCH_SIZE = 32
- GPUS = '2'
-
- parser = ArgumentParser(description='Welcome to EasyTorch!')
- parser.add_argument('-m', '--model', default=MODEL_NAME, help='model name')
- parser.add_argument('-d', '--dataset', default=DATASET_NAME, help='dataset name')
- parser.add_argument('-g', '--gpus', default=GPUS, help='visible gpus')
- parser.add_argument('-b', '--batch_size', default=BATCH_SIZE, type=int, help='batch size')
- args = parser.parse_args()
-
- cfg_path = 'baselines/{0}/{1}.py'.format(args.model, args.dataset)
- ckpt_path = 'ckpt/{0}/{1}/{0}_best_val_MAE.pt'.format(args.model, args.dataset)
-
- launch_runner(cfg_path, inference, (ckpt_path, args.batch_size), devices=args.gpus)
diff --git a/experiments/run_m4.py b/experiments/run_m4.py
deleted file mode 100644
index 5d238a79..00000000
--- a/experiments/run_m4.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# 1. 给定方法,例如MLP; 给定是否保存结果,给定是否保留预测结果;
-# 2. 检查是否存在配置文件生成器,例如baselines/MLP/M4_base.py
-# 3. 循环获取CFG,进行训练并保存结果
-# 4. M4 Summary
-# 5. 保存结果、删除预测结果
-
-import os
-import sys
-import importlib
-from argparse import ArgumentParser
-# TODO: remove it when basicts can be installed by pip
-project_dir = os.path.abspath(__file__ + "/../..")
-sys.path.append(project_dir)
-import torch
-from basicts import launch_training
-from basicts.utils import m4_summary
-from easytorch.utils.logging import logger_initialized
-from basicts.utils.logging import clear_loggers
-
-torch.set_num_threads(3) # aviod high cpu avg usage
-
-
-def parse_args():
- parser = ArgumentParser(description="Run time series forecasting model in BasicTS framework!")
- # parser.add_argument("-c", "--config", default="baselines/STID_M4/M4.py", help="training config template")
- parser.add_argument("-c", "--config", default="baselines/MLP/M4.py", help="training config template")
- parser.add_argument("-g", "--gpus", default="3", help="visible gpus")
- parser.add_argument("--save_evaluation", default=True, help="if save evaluation results")
- parser.add_argument("--save_prediction", default=False, help="if save prediction results")
- return parser.parse_args()
-
-if __name__ == "__main__":
- args = parse_args()
- cfg_generator_file = args.config[:-3].replace("/", ".")
-
- # training
- get_cfg = importlib.import_module(cfg_generator_file, package=project_dir).get_cfg
- seasonal_patterns = ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- for seasonal_pattern in seasonal_patterns:
- cfg = get_cfg(seasonal_pattern)
- launch_training(cfg, args.gpus)
- clear_loggers()
-
- # evaluating
- save_dir = os.path.abspath(args.config + "/..")
- result = m4_summary(save_dir, project_dir) # pd.DataFrame
-
- # save results
- if not args.save_prediction: os.system("rm -rf {0}/M4_*.npy".format(save_dir))
- if args.save_evaluation: result.to_csv("{0}/M4_summary.csv".format(save_dir), index=False)
- else: os.system("rm {0}/M4_summary.csv".format(save_dir))
-
- # print results
- print(result)
diff --git a/experiments/train.py b/experiments/train.py
index 0ef9f399..1380f038 100644
--- a/experiments/train.py
+++ b/experiments/train.py
@@ -8,18 +8,22 @@
# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../.."))
import torch
-from basicts import launch_training
+import basicts
-torch.set_num_threads(3) # aviod high cpu avg usage
+torch.set_num_threads(4) # aviod high cpu avg usage
def parse_args():
parser = ArgumentParser(description="Run time series forecasting model in BasicTS framework!")
- parser.add_argument("-c", "--cfg", default="baselines/STID/METR-LA.py", help="training config")
+ # parser.add_argument("-c", "--cfg", default="baselines/STID/METR-LA.py", help="training config")
+ parser.add_argument("-c", "--cfg", default="baselines/STEP/STEP_METR-LA2.py", help="training config")
+ # parser.add_argument("-c", "--cfg", default="baselines/DGCRN/PEMS-BAY.py", help="training config")
+ # parser.add_argument("-c", "--cfg", default="baselines/DGCRN/example.py", help="training config")
+ # parser.add_argument("-c", "--cfg", default="examples/complete_config.py", help="training config")
parser.add_argument("-g", "--gpus", default="0", help="visible gpus")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
- launch_training(args.cfg, args.gpus)
+ basicts.launch_training(args.cfg, args.gpus, node_rank=0)
diff --git a/requirements.txt b/requirements.txt
index 4ad5ae8b..e5c85a27 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
-easy-torch==1.3.2
-easydict==1.10
-pandas==1.3.5
-packaging==23.1
+easy-torch
+easydict
+packaging
+setproctitle
+pandas
+scikit-learn
+tables
+sympy
setuptools==59.5.0
-scipy==1.7.3
-tables==3.7.0
-sympy==1.10.1
-setproctitle==1.3.2
-scikit-learn==1.0.2
+numpy==1.24.4
diff --git a/scripts/data_preparation/BeijingAirQuality/generate_training_data.py b/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
index a74119af..016cc964 100644
--- a/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
+++ b/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
@@ -1,158 +1,103 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'BeijingAirQuality'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.xlsx'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+steps_per_day = 24 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'Beijing air quality'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_excel(data_file_path)
data = df.values
colums = df.columns
data = np.expand_dims(df.values, axis=-1)
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
- print("columns: {0}".format(colums))
+ print(f'Raw time series shape: {data.shape}')
+ print('Columns: {0}'.format(colums))
+ return data
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
+def add_temporal_features(data):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
if add_time_of_day:
- # numerical time_of_day
- tod = [i % steps_per_day /
- steps_per_day for i in range(data_norm.shape[0])]
- tod = np.array(tod)
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = [(i // steps_per_day) % 7 / 7 for i in range(data_norm.shape[0])]
- dow = np.array(dow)
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 # every 1 hour
-
- DATASET_NAME = "BeijingAirQuality" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.xlsx".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/CA/generate_training_data.py b/scripts/data_preparation/CA/generate_training_data.py
index bdd33a10..d621c9fb 100644
--- a/scripts/data_preparation/CA/generate_training_data.py
+++ b/scripts/data_preparation/CA/generate_training_data.py
@@ -1,179 +1,136 @@
import os
-import sys
-import shutil
+import json
import pickle
-import argparse
+import shutil
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-# Dataset Description:
-# LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting.
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'CA'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.npy'
+meta_file_path = f'datasets/raw_data/{dataset_name}/meta_{dataset_name}.csv'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_hdf(data_file_path)
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = (
- df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = df.index.dayofweek / 7
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
- dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dom_tiled)
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
if add_day_of_year:
# numerical day_of_year
- doy = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
- doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(doy_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
adj_mx = np.load(graph_file_path)
- with open(output_dir + "/adj_mx.pkl", "wb") as f:
+ with open(output_dir + '/adj_mx.pkl', 'wb') as f:
pickle.dump(adj_mx, f)
- # copy adj meta data
- shutil.copyfile(graph_file_path, output_dir + "/adj_meta.csv")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "CA"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.npy".format(DATASET_NAME)
- GRAPH_METE_PATH = "datasets/raw_data/{0}/meta_{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_meta_data():
+ '''Save the meta data to the output directory'''
+ output_meta_data_path = os.path.join(output_dir, 'meta.csv')
+ shutil.copyfile(meta_file_path, output_meta_data_path)
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Copy and save meta data
+ save_meta_data()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/ETTh1/generate_training_data.py b/scripts/data_preparation/ETTh1/generate_training_data.py
index 889b3aad..8991825a 100644
--- a/scripts/data_preparation/ETTh1/generate_training_data.py
+++ b/scripts/data_preparation/ETTh1/generate_training_data.py
@@ -1,81 +1,53 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'ETTh1'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 24 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'electricity transformer temperature'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- # Following many previous works (e.g., Informer, Autoformer), we use the first 20 months of data, i.e., the first 14400 rows.
df = df.iloc[:20*30*24]
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
- tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
tod = np.array(tod)
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -98,78 +70,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 # every 1 hour
-
- DATASET_NAME = "ETTh1" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings,
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/ETTh2/generate_training_data.py b/scripts/data_preparation/ETTh2/generate_training_data.py
index 320fac22..0034997a 100644
--- a/scripts/data_preparation/ETTh2/generate_training_data.py
+++ b/scripts/data_preparation/ETTh2/generate_training_data.py
@@ -1,68 +1,121 @@
import os
-import sys
-import argparse
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from scripts.data_preparation.ETTh1.generate_training_data import generate_data
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 336
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 # every 1 hour
-
- DATASET_NAME = "ETTh2" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+import json
+
+import numpy as np
+import pandas as pd
+
+# Hyperparameters
+dataset_name = 'ETTh2'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 24 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'electricity transformer temperature'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ df = pd.read_csv(data_file_path)
+ df = df.iloc[:20*30*24]
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
+ df = df[df.columns[1:]]
+ df.index = df_index
+ data = np.expand_dims(df.values, axis=-1)
+ data = data[..., target_channel]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
+ if add_time_of_day:
+ # numerical time_of_day
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
+ tod = np.array(tod)
+ tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(tod_tiled)
+
+ if add_day_of_week:
+ # numerical day_of_week
+ dow = df.index.dayofweek / 7
+ dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(dow_tiled)
+
+ if add_day_of_month:
+ # numerical day_of_month
+ dom = (df.index.day - 1) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(dom_tiled)
+
+ if add_day_of_year:
+ # numerical day_of_year
+ doy = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(doy_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/ETTm1/generate_training_data.py b/scripts/data_preparation/ETTm1/generate_training_data.py
index 7bc766e0..73c6d11b 100644
--- a/scripts/data_preparation/ETTm1/generate_training_data.py
+++ b/scripts/data_preparation/ETTm1/generate_training_data.py
@@ -1,81 +1,53 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'ETTm1'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'electricity transformer temperature'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- # Following many previous works (e.g., Informer, Autoformer), we use the first 20 months of data, i.e., the first 14400 rows.
df = df.iloc[:20*30*24*4]
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
- tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
tod = np.array(tod)
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -98,77 +70,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 * 4 # every 15 minutes
-
- DATASET_NAME = "ETTm1" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/ETTm2/generate_training_data.py b/scripts/data_preparation/ETTm2/generate_training_data.py
index 46c2e65a..e78905a2 100644
--- a/scripts/data_preparation/ETTm2/generate_training_data.py
+++ b/scripts/data_preparation/ETTm2/generate_training_data.py
@@ -1,68 +1,121 @@
import os
-import sys
-import argparse
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from scripts.data_preparation.ETTm1.generate_training_data import generate_data
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 1680
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 * 4 # every 15 minutes
-
- DATASET_NAME = "ETTm2" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
\ No newline at end of file
+import json
+
+import numpy as np
+import pandas as pd
+
+# Hyperparameters
+dataset_name = 'ETTm2'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'electricity transformer temperature'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ df = pd.read_csv(data_file_path)
+ df = df.iloc[:20*30*24*4]
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
+ df = df[df.columns[1:]]
+ df.index = df_index
+ data = np.expand_dims(df.values, axis=-1)
+ data = data[..., target_channel]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
+ if add_time_of_day:
+ # numerical time_of_day
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
+ tod = np.array(tod)
+ tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(tod_tiled)
+
+ if add_day_of_week:
+ # numerical day_of_week
+ dow = df.index.dayofweek / 7
+ dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(dow_tiled)
+
+ if add_day_of_month:
+ # numerical day_of_month
+ dom = (df.index.day - 1) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(dom_tiled)
+
+ if add_day_of_year:
+ # numerical day_of_year
+ doy = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(doy_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Electricity/generate_training_data.py b/scripts/data_preparation/Electricity/generate_training_data.py
index c1920cb5..464498f5 100644
--- a/scripts/data_preparation/Electricity/generate_training_data.py
+++ b/scripts/data_preparation/Electricity/generate_training_data.py
@@ -1,79 +1,52 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Electricity'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 24 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'electricity consumption'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
- tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
tod = np.array(tod)
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -86,7 +59,7 @@ def generate_data(args: argparse.Namespace):
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ dom = (df.index.day - 1) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(dom_tiled)
@@ -96,78 +69,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 168
- FUTURE_SEQ_LEN = 96
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 # every 1 hour
-
- DATASET_NAME = "Electricity" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/ExchangeRate/generate_training_data.py b/scripts/data_preparation/ExchangeRate/generate_training_data.py
index a24ca9ad..2cf9ccf5 100644
--- a/scripts/data_preparation/ExchangeRate/generate_training_data.py
+++ b/scripts/data_preparation/ExchangeRate/generate_training_data.py
@@ -1,79 +1,52 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'ExchangeRate'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 1 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'exchange rate'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
- tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
tod = np.array(tod)
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -96,78 +69,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 1 # every 24 hour
-
- DATASET_NAME = "ExchangeRate" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
\ No newline at end of file
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/GBA/generate_training_data.py b/scripts/data_preparation/GBA/generate_training_data.py
index d52c2ba6..0b8e83a7 100644
--- a/scripts/data_preparation/GBA/generate_training_data.py
+++ b/scripts/data_preparation/GBA/generate_training_data.py
@@ -1,73 +1,136 @@
import os
-import sys
-import argparse
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from scripts.data_preparation.CA.generate_training_data import generate_data
-
-# Dataset Description:
-# LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting.
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "GBA"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.npy".format(DATASET_NAME)
- GRAPH_METE_PATH = "datasets/raw_data/{0}/meta_{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+import json
+import pickle
+import shutil
+
+import numpy as np
+import pandas as pd
+
+# Hyperparameters
+dataset_name = 'GBA'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.npy'
+meta_file_path = f'datasets/raw_data/{dataset_name}/meta_{dataset_name}.csv'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ df = pd.read_hdf(data_file_path)
+ data = np.expand_dims(df.values, axis=-1)
+ data = data[..., target_channel]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
+ if add_time_of_day:
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
+
+ if add_day_of_week:
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ if add_day_of_month:
+ # numerical day_of_month
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
+
+ if add_day_of_year:
+ # numerical day_of_year
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ adj_mx = np.load(graph_file_path)
+ with open(output_dir + '/adj_mx.pkl', 'wb') as f:
+ pickle.dump(adj_mx, f)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_meta_data():
+ '''Save the meta data to the output directory'''
+ output_meta_data_path = os.path.join(output_dir, 'meta.csv')
+ shutil.copyfile(meta_file_path, output_meta_data_path)
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Copy and save meta data
+ save_meta_data()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/GLA/generate_training_data.py b/scripts/data_preparation/GLA/generate_training_data.py
index 9128316c..3cdb21d1 100644
--- a/scripts/data_preparation/GLA/generate_training_data.py
+++ b/scripts/data_preparation/GLA/generate_training_data.py
@@ -1,73 +1,136 @@
import os
-import sys
-import argparse
-
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from scripts.data_preparation.CA.generate_training_data import generate_data
-
-# Dataset Description:
-# LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting.
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "GLA"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.npy".format(DATASET_NAME)
- GRAPH_METE_PATH = "datasets/raw_data/{0}/meta_{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+import json
+import pickle
+import shutil
+
+import numpy as np
+import pandas as pd
+
+# Hyperparameters
+dataset_name = 'GLA'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.npy'
+meta_file_path = f'datasets/raw_data/{dataset_name}/meta_{dataset_name}.csv'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ df = pd.read_hdf(data_file_path)
+ data = np.expand_dims(df.values, axis=-1)
+ data = data[..., target_channel]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
+ if add_time_of_day:
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
+
+ if add_day_of_week:
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ if add_day_of_month:
+ # numerical day_of_month
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
+
+ if add_day_of_year:
+ # numerical day_of_year
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ adj_mx = np.load(graph_file_path)
+ with open(output_dir + '/adj_mx.pkl', 'wb') as f:
+ pickle.dump(adj_mx, f)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_meta_data():
+ '''Save the meta data to the output directory'''
+ output_meta_data_path = os.path.join(output_dir, 'meta.csv')
+ shutil.copyfile(meta_file_path, output_meta_data_path)
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Copy and save meta data
+ save_meta_data()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Gaussian/generate_training_data.py b/scripts/data_preparation/Gaussian/generate_training_data.py
index c5174306..946cb6a5 100644
--- a/scripts/data_preparation/Gaussian/generate_training_data.py
+++ b/scripts/data_preparation/Gaussian/generate_training_data.py
@@ -1,122 +1,74 @@
import os
-import sys
-import shutil
-import pickle
-import argparse
+import json
import numpy as np
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Gaussian'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npy'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+frequency = None
+domain = 'simulated Gaussian data'
+feature_description = [domain]
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
data = np.load(data_file_path)
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 96
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "Gaussian"
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npy".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Save processed data
+ save_data(data)
+
+ # Save dataset description
+ save_description(data)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Gaussian/simulate_data.py b/scripts/data_preparation/Gaussian/simulate_data.py
index 23996644..a8a2cafc 100644
--- a/scripts/data_preparation/Gaussian/simulate_data.py
+++ b/scripts/data_preparation/Gaussian/simulate_data.py
@@ -1,24 +1,24 @@
import os
-import sys
+
+import torch
import numpy as np
-import matplotlib.pyplot as plt
-PROJECT_DIR = os.path.abspath(__file__ + "/../../../..")
-os.chdir(PROJECT_DIR)
+PROJECT_DIR = os.path.abspath(__file__ + '/../../../..')
+os.chdir(PROJECT_DIR)
-def generate_gaussian_noise_sequence(duration):
- time_points = np.arange(0, duration, 1)
- gaussion_noise_sequence = np.random.normal(0, 1, duration)
- return time_points, gaussion_noise_sequence
# hyper parameterts
duration = 10000 # time series length
+def generate_gaussian_noise_sequence():
+ x = np.arange(0, duration, 1)
+ y = np.random.normal(0, 1, duration)
+ return x, y
+
# generate gaussian sequence
-time_points, gaussian_noise_sequence = generate_gaussian_noise_sequence(duration)
+time_points, gaussian_noise_sequence = generate_gaussian_noise_sequence()
# save pulse sequence
-import torch
data = torch.Tensor(gaussian_noise_sequence).unsqueeze(-1).unsqueeze(-1).numpy()
# mkdir datasets/raw_data/Gaussian
if not os.path.exists('datasets/raw_data/Gaussian'):
diff --git a/scripts/data_preparation/Illness/generate_training_data.py b/scripts/data_preparation/Illness/generate_training_data.py
index 925003ac..47feb121 100644
--- a/scripts/data_preparation/Illness/generate_training_data.py
+++ b/scripts/data_preparation/Illness/generate_training_data.py
@@ -1,81 +1,53 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Illness'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 1/7 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'ilness data'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 96,
+ 'OUTPUT_LEN': 48,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
-
tod = (
- df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
+ df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -87,7 +59,7 @@ def generate_data(args: argparse.Namespace):
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ dom = (df.index.day - 1) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(dom_tiled)
@@ -97,78 +69,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 168
- FUTURE_SEQ_LEN = 96
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 1 # every 1 hour
-
- DATASET_NAME = "Illness" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/M4/generate_training_data.py b/scripts/data_preparation/M4/generate_training_data.py
deleted file mode 100644
index 4a0b840f..00000000
--- a/scripts/data_preparation/M4/generate_training_data.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import os
-import pickle
-import pandas as pd
-import numpy as np
-from tqdm import tqdm
-
-project_dir = os.path.abspath(__file__ + "/../../../..")
-data_dir = project_dir + "/datasets/raw_data/M4/"
-
-def generate_subset(seasonal_pattern):
- output_dir = project_dir + "/datasets/M4_{0}/".format(seasonal_pattern)
- processed_data_train = [] # final data
- processed_mask_train = []
- processed_data_test = []
- processed_mask_test = []
- train_index = []
- test_index = []
- scaler = []
- future_seq_len = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
- lookback = 2
- history_seq_len = future_seq_len * lookback
- index_lower_bound = int({"Yearly": 1.5, "Quarterly": 1.5, "Monthly": 1.5, "Weekly": 10, "Daily": 10, "Hourly": 10}[seasonal_pattern] * future_seq_len) # generate index from len(train_data) - index_lower_bound
-
- # read data
- train_data = pd.read_csv(data_dir + seasonal_pattern + "-train.csv")
- test_data = pd.read_csv(data_dir + seasonal_pattern + "-test.csv")
- meta_info = pd.read_csv(data_dir + "M4-info.csv")
-
- def process_one_series(ts_id, ts_train, ts_info):
- """generate data and index for one series.
-
- Args:
- ts_id (int): time series id in each subset.
- ts_train (pd.Series): time series data.
- info (pd.Series): time series info.
- """
- ts_train = ts_train.tolist()
- mask_train = [1] * len(ts_train)
-
- # generate padded data
- low = max(1, len(ts_train) - index_lower_bound)
- if low - history_seq_len < 0: left_padding = [0] * (history_seq_len - low)
- else: left_padding = []
- right_padding = [0] * future_seq_len
- ts_train_padded = left_padding + ts_train + right_padding
- # generate mask
- mask_train_padded = [0] * len(left_padding) + mask_train + [0] * len(right_padding)
- # df = generate_dataframe(start_time_padded, seasonal_pattern, ts_train_padded)
- df = pd.DataFrame(data={'Values': ts_train_padded})
- df["IDs"] = ts_id
- # generate data
- new_data = df.values.tolist() # first dimension: values, second dimension: IDs
- # generate index
- index_list = []
- for t in range(low + len(left_padding), len(ts_train_padded) - future_seq_len):
- index_list.append([t - history_seq_len, t, t + future_seq_len])
- # generate scaler
- scaler = None
- return new_data, mask_train_padded, index_list, scaler
-
- # generate training data
- for ts_id in tqdm(range(train_data.shape[0])):
- ts_name = train_data.iloc[ts_id, :]["V1"]
- ts_info = meta_info[meta_info["M4id"] == ts_name].iloc[0, :]
- ts_train = train_data.iloc[ts_id, :].drop("V1").dropna()
-
- ts_data, ts_mask, ts_index, ts_scaler = process_one_series(ts_id, ts_train, ts_info)
- processed_data_train.append(ts_data)
- processed_mask_train.append(ts_mask)
- train_index.append(ts_index)
- scaler.append(ts_scaler)
-
- for ts_id in tqdm(range(test_data.shape[0])):
- ts_name = test_data.iloc[ts_id, :]["V1"]
- ts_info = meta_info[meta_info["M4id"] == ts_name].iloc[0, :]
- ts_test = test_data.iloc[ts_id, :].drop("V1").dropna()
- ts_train_last_sample_index = train_index[ts_id][-1]
- ts_train_last_sample_history = processed_data_train[ts_id][ts_train_last_sample_index[0]+1:ts_train_last_sample_index[1]+1] # last history sample
- ts_train_last_sample_future = processed_data_train[ts_id][ts_train_last_sample_index[1]+1:ts_train_last_sample_index[2]+1] # last future sample, should be all zeros
- ts_train_last_sample_mask = processed_mask_train[ts_id][ts_train_last_sample_index[0]+1:ts_train_last_sample_index[1]+1] # last history sample mask. there might be some mask in the history data when the history_seq_len is large.
- assert sum([_[0] for _ in ts_train_last_sample_future]) == 0
- ts_train_last_sample_future = np.array(ts_train_last_sample_future)
- ts_train_last_sample_future[:, 0] = ts_test.tolist()
- ts_data = ts_train_last_sample_history + ts_train_last_sample_future.tolist()
- processed_data_test.append(ts_data)
- processed_mask_test.append(ts_train_last_sample_mask + [1] * len(ts_test))
- test_index.append([[0, len(ts_train_last_sample_history), len(ts_train_last_sample_history) + len(ts_test)]])
- assert ts_test.shape[0] == future_seq_len, "test data length should be equal to future_seq_len"
-
- # create output dir if not exists
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- ## save data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_None.pkl".format(history_seq_len, future_seq_len), "wb") as f:
- pickle.dump({"train": processed_data_train, "test": processed_data_test}, f)
- ## save mask
- with open(output_dir + "/mask_in_{0}_out_{1}_rescale_None.pkl".format(history_seq_len, future_seq_len), "wb") as f:
- pickle.dump({"train": processed_mask_train, "test": processed_mask_test}, f)
- ## save index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_None.pkl".format(history_seq_len, future_seq_len), "wb") as f:
- pickle.dump({"train": train_index, "test": test_index}, f)
-
-if __name__ == "__main__":
- seasonal_patterns = ["Yearly", "Quarterly", "Monthly", "Weekly", "Daily", "Hourly"]
- for seasonal_pattern in seasonal_patterns:
- print(seasonal_pattern)
- generate_subset(seasonal_pattern)
diff --git a/scripts/data_preparation/METR-LA/generate_training_data.py b/scripts/data_preparation/METR-LA/generate_training_data.py
index a51ef0c3..b5d13e0e 100644
--- a/scripts/data_preparation/METR-LA/generate_training_data.py
+++ b/scripts/data_preparation/METR-LA/generate_training_data.py
@@ -1,171 +1,124 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'METR-LA'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic speed'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_hdf(data_file_path)
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = (
- df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = df.index.dayofweek / 7
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
- dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dom_tiled)
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
if add_day_of_year:
# numerical day_of_year
- doy = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
- doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(doy_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- shutil.copyfile(graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "METR-LA"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/PEMS-BAY/generate_training_data.py b/scripts/data_preparation/PEMS-BAY/generate_training_data.py
index f97ebf93..83670af4 100644
--- a/scripts/data_preparation/PEMS-BAY/generate_training_data.py
+++ b/scripts/data_preparation/PEMS-BAY/generate_training_data.py
@@ -1,171 +1,124 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'PEMS-BAY'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic speed'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_hdf(data_file_path)
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = (
- df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = df.index.dayofweek / 7
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
- dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dom_tiled)
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
if add_day_of_year:
# numerical day_of_year
- doy = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
- doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(doy_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- shutil.copyfile(graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "PEMS-BAY"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/PEMS03/generate_training_data.py b/scripts/data_preparation/PEMS03/generate_training_data.py
index 11d468a3..bf212a5f 100644
--- a/scripts/data_preparation/PEMS03/generate_training_data.py
+++ b/scripts/data_preparation/PEMS03/generate_training_data.py
@@ -1,161 +1,114 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
-from generate_adj_mx import generate_adj_pems03
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
- data = np.load(data_file_path)["data"]
+from generate_adj_mx import generate_adj_pems03 as generate_adj
+
+# Hyperparameters
+dataset_name = 'PEMS03'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ data = np.load(data_file_path)['data']
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def add_temporal_features(data):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = [i % steps_per_day /
- steps_per_day for i in range(data_norm.shape[0])]
- tod = np.array(tod)
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = [(i // steps_per_day) % 7 / 7 for i in range(data_norm.shape[0])]
- dow = np.array(dow)
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- if os.path.exists(args.graph_file_path):
- # copy
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
+ day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory, generating it if necessary.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ if os.path.exists(graph_file_path):
+ shutil.copyfile(graph_file_path, output_graph_path)
else:
- # generate and copy
- generate_adj_pems03()
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 288
-
- DATASET_NAME = "PEMS03"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npz".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ generate_adj()
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy or generate and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/PEMS04/generate_training_data.py b/scripts/data_preparation/PEMS04/generate_training_data.py
index bed3d946..ec3b9e3d 100644
--- a/scripts/data_preparation/PEMS04/generate_training_data.py
+++ b/scripts/data_preparation/PEMS04/generate_training_data.py
@@ -1,161 +1,114 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
-from generate_adj_mx import generate_adj_pems04
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
- data = np.load(data_file_path)["data"]
+from generate_adj_mx import generate_adj_pems04 as generate_adj
+
+# Hyperparameters
+dataset_name = 'PEMS04'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ data = np.load(data_file_path)['data']
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def add_temporal_features(data):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = [i % steps_per_day /
- steps_per_day for i in range(data_norm.shape[0])]
- tod = np.array(tod)
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = [(i // steps_per_day) % 7 / 7 for i in range(data_norm.shape[0])]
- dow = np.array(dow)
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- if os.path.exists(args.graph_file_path):
- # copy
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
+ day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory, generating it if necessary.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ if os.path.exists(graph_file_path):
+ shutil.copyfile(graph_file_path, output_graph_path)
else:
- # generate and copy
- generate_adj_pems04()
- shutil.copyfile(graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 288
-
- DATASET_NAME = "PEMS04"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npz".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ generate_adj()
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy or generate and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/PEMS07/generate_training_data.py b/scripts/data_preparation/PEMS07/generate_training_data.py
index a5fa12c8..559c1797 100644
--- a/scripts/data_preparation/PEMS07/generate_training_data.py
+++ b/scripts/data_preparation/PEMS07/generate_training_data.py
@@ -1,161 +1,114 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
-from generate_adj_mx import generate_adj_pems07
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
- data = np.load(data_file_path)["data"]
+from generate_adj_mx import generate_adj_pems07 as generate_adj
+
+# Hyperparameters
+dataset_name = 'PEMS07'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ data = np.load(data_file_path)['data']
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def add_temporal_features(data):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = [i % steps_per_day /
- steps_per_day for i in range(data_norm.shape[0])]
- tod = np.array(tod)
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = [(i // steps_per_day) % 7 / 7 for i in range(data_norm.shape[0])]
- dow = np.array(dow)
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- if os.path.exists(args.graph_file_path):
- # copy
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
+ day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory, generating it if necessary.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ if os.path.exists(graph_file_path):
+ shutil.copyfile(graph_file_path, output_graph_path)
else:
- # generate and copy
- generate_adj_pems07()
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 288
-
- DATASET_NAME = "PEMS07"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npz".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ generate_adj()
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy or generate and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/PEMS08/generate_training_data.py b/scripts/data_preparation/PEMS08/generate_training_data.py
index ca452959..28a1e6e8 100644
--- a/scripts/data_preparation/PEMS08/generate_training_data.py
+++ b/scripts/data_preparation/PEMS08/generate_training_data.py
@@ -1,161 +1,114 @@
import os
-import sys
+import json
import shutil
-import pickle
-import argparse
import numpy as np
-from generate_adj_mx import generate_adj_pems08
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- graph_file_path = args.graph_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
- data = np.load(data_file_path)["data"]
+from generate_adj_mx import generate_adj_pems08 as generate_adj
+
+# Hyperparameters
+dataset_name = 'PEMS08'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+steps_per_day = 288 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ data = np.load(data_file_path)['data']
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def add_temporal_features(data):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
- # numerical time_of_day
- tod = [i % steps_per_day /
- steps_per_day for i in range(data_norm.shape[0])]
- tod = np.array(tod)
- tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(tod_tiled)
+ time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
if add_day_of_week:
- # numerical day_of_week
- dow = [(i // steps_per_day) % 7 / 7 for i in range(data_norm.shape[0])]
- dow = np.array(dow)
- dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
- feature_list.append(dow_tiled)
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
- # copy adj
- if os.path.exists(args.graph_file_path):
- # copy
- shutil.copyfile(args.graph_file_path, output_dir + "/adj_mx.pkl")
+ day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory, generating it if necessary.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ if os.path.exists(graph_file_path):
+ shutil.copyfile(graph_file_path, output_graph_path)
else:
- # generate and copy
- generate_adj_pems08()
- shutil.copyfile(graph_file_path, output_dir + "/adj_mx.pkl")
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 288
-
- DATASET_NAME = "PEMS08"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npz".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.pkl".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ generate_adj()
+ shutil.copyfile(graph_file_path, output_graph_path)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy or generate and save adjacency matrix
+ save_graph()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Pulse/generate_training_data.py b/scripts/data_preparation/Pulse/generate_training_data.py
index f5ee733c..2e6ebe8f 100644
--- a/scripts/data_preparation/Pulse/generate_training_data.py
+++ b/scripts/data_preparation/Pulse/generate_training_data.py
@@ -1,122 +1,75 @@
import os
-import sys
-import shutil
-import pickle
-import argparse
+import json
import numpy as np
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Pulse'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npy'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+frequency = None
+domain = 'simulated pulse data'
+feature_description = [domain]
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
data = np.load(data_file_path)
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
-
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 720
- FUTURE_SEQ_LEN = 96
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "Pulse"
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npy".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ print(f'Raw time series shape: {data.shape}')
+ return data
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data = load_and_preprocess_data()
+
+ # Save processed data
+ save_data(data)
+
+ # Save dataset description
+ save_description(data)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Pulse/simulate_data.py b/scripts/data_preparation/Pulse/simulate_data.py
index 29a6f918..272a252c 100644
--- a/scripts/data_preparation/Pulse/simulate_data.py
+++ b/scripts/data_preparation/Pulse/simulate_data.py
@@ -1,33 +1,33 @@
import os
-import sys
+
+import torch
import numpy as np
-import matplotlib.pyplot as plt
-PROJECT_DIR = os.path.abspath(__file__ + "/../../../..")
+
+PROJECT_DIR = os.path.abspath(__file__ + '/../../../..')
os.chdir(PROJECT_DIR)
-def generate_pulse_sequence(duration, min_interval, max_interval):
- time_points = np.arange(0, duration, 1)
- pulse_sequence = np.zeros_like(time_points)
+# hyper parameterts
+duration = 20000 # time series length
+min_interval = 30 # minimum interval between two pulses
+max_interval = 30 # maximum interval between two pulses
+
+def generate_pulse_sequence():
+ x = np.arange(0, duration, 1)
+ y = np.zeros_like(x)
current_time = 0
while current_time < duration:
pulse_interval = np.random.uniform(min_interval, max_interval)
pulse_width = 1
- pulse_sequence[int(current_time):int(current_time + pulse_width)] = 1
+ y[int(current_time):int(current_time + pulse_width)] = 1
current_time += pulse_interval + pulse_width
- return time_points, pulse_sequence
-
-# hyper parameterts
-duration = 20000 # time series length
-min_interval = 30 # minimum interval between two pulses
-max_interval = 30 # maximum interval between two pulses
+ return x, y
# generate pulse sequence
-time_points, pulse_sequence = generate_pulse_sequence(duration, min_interval, max_interval)
+time_points, pulse_sequence = generate_pulse_sequence()
# save pulse sequence
-import torch
data = torch.Tensor(pulse_sequence).unsqueeze(-1).unsqueeze(-1).numpy()
np.save('datasets/raw_data/Pulse/Pulse.npy', data)
diff --git a/scripts/data_preparation/SD/generate_training_data.py b/scripts/data_preparation/SD/generate_training_data.py
index 88348389..01d08eae 100644
--- a/scripts/data_preparation/SD/generate_training_data.py
+++ b/scripts/data_preparation/SD/generate_training_data.py
@@ -1,72 +1,136 @@
import os
-import sys
-import argparse
-
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from scripts.data_preparation.CA.generate_training_data import generate_data
-
-# Dataset Description:
-# LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting.
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 12
- FUTURE_SEQ_LEN = 12
-
- TRAIN_RATIO = 0.6
- VALID_RATIO = 0.2
- TARGET_CHANNEL = [0] # target channel(s)
-
- DATASET_NAME = "SD"
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.h5".format(DATASET_NAME)
- GRAPH_FILE_PATH = "datasets/raw_data/{0}/adj_{0}.npy".format(DATASET_NAME)
- GRAPH_METE_PATH = "datasets/raw_data/{0}/meta_{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--graph_file_path", type=str,
- default=GRAPH_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+import json
+import pickle
+import shutil
+
+import numpy as np
+import pandas as pd
+
+# Hyperparameters
+dataset_name = 'SD'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.h5'
+graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.npy'
+meta_file_path = f'datasets/raw_data/{dataset_name}/meta_{dataset_name}.csv'
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = False # Add day of the month as a feature
+add_day_of_year = False # Add day of the year as a feature
+steps_per_day = 96 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'traffic flow'
+feature_description = [domain, 'time of day', 'day of week']
+regular_settings = {
+ 'INPUT_LEN': 12,
+ 'OUTPUT_LEN': 12,
+ 'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+ 'NORM_EACH_CHANNEL': False,
+ 'RESCALE': True,
+ 'METRICS': ['MAE', 'RMSE', 'MAPE'],
+ 'NULL_VAL': 0.0
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
+ df = pd.read_hdf(data_file_path)
+ data = np.expand_dims(df.values, axis=-1)
+ data = data[..., target_channel]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
+ if add_time_of_day:
+ time_of_day = (df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
+ time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(time_of_day_tiled)
+
+ if add_day_of_week:
+ day_of_week = df.index.dayofweek / 7
+ day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_week_tiled)
+
+ if add_day_of_month:
+ # numerical day_of_month
+ day_of_month = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ day_of_month_tiled = np.tile(day_of_month, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_month_tiled)
+
+ if add_day_of_year:
+ # numerical day_of_year
+ day_of_year = (df.index.dayofyear - 1) / 366 # df.index.month starts from 1. We need to minus 1 to make it start from 0.
+ day_of_year_tiled = np.tile(day_of_year, [1, n, 1]).transpose((2, 1, 0))
+ feature_list.append(day_of_year_tiled)
+
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_graph():
+ '''Save the adjacency matrix to the output directory.'''
+ output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
+ adj_mx = np.load(graph_file_path)
+ with open(output_dir + '/adj_mx.pkl', 'wb') as f:
+ pickle.dump(adj_mx, f)
+ print(f'Adjacency matrix saved to {output_graph_path}')
+
+def save_meta_data():
+ '''Save the meta data to the output directory'''
+ output_meta_data_path = os.path.join(output_dir, 'meta.csv')
+ shutil.copyfile(meta_file_path, output_meta_data_path)
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Copy and save adjacency matrix
+ save_graph()
+
+ # Copy and save meta data
+ save_meta_data()
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Traffic/generate_training_data.py b/scripts/data_preparation/Traffic/generate_training_data.py
index d96d81ef..dd855ab2 100644
--- a/scripts/data_preparation/Traffic/generate_training_data.py
+++ b/scripts/data_preparation/Traffic/generate_training_data.py
@@ -1,90 +1,56 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Traffic'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 24 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'road occupancy rates'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- df_index = pd.to_datetime(df["date"].values, format="%Y-%m-%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ _, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
-
tod = (
- df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
+ df.index.values - df.index.values.astype('datetime64[D]')) / np.timedelta64(1, 'D')
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
-
- # tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
- # tod = np.array(tod)
- # tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
- # feature_list.append(tod_tiled)
-
if add_day_of_week:
# numerical day_of_week
dow = df.index.dayofweek / 7
@@ -93,7 +59,7 @@ def generate_data(args: argparse.Namespace):
if add_day_of_month:
# numerical day_of_month
- dom = (df.index.day - 1 ) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
+ dom = (df.index.day - 1) / 31 # df.index.day starts from 1. We need to minus 1 to make it start from 0.
dom_tiled = np.tile(dom, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(dom_tiled)
@@ -103,78 +69,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 168
- FUTURE_SEQ_LEN = 96
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 24 # every 1 hour
-
- DATASET_NAME = "Electricity" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/Weather/generate_training_data.py b/scripts/data_preparation/Weather/generate_training_data.py
index c897afca..d531464e 100644
--- a/scripts/data_preparation/Weather/generate_training_data.py
+++ b/scripts/data_preparation/Weather/generate_training_data.py
@@ -1,79 +1,52 @@
import os
-import sys
-import pickle
-import argparse
+import json
import numpy as np
import pandas as pd
-# TODO: remove it when basicts can be installed by pip
-sys.path.append(os.path.abspath(__file__ + "/../../../.."))
-from basicts.data.transform import standard_transform
-
-
-def generate_data(args: argparse.Namespace):
- """Preprocess and generate train/valid/test datasets.
-
- Args:
- args (argparse): configurations of preprocessing
- """
-
- target_channel = args.target_channel
- future_seq_len = args.future_seq_len
- history_seq_len = args.history_seq_len
- add_time_of_day = args.tod
- add_day_of_week = args.dow
- add_day_of_month = args.dom
- add_day_of_year = args.doy
- output_dir = args.output_dir
- train_ratio = args.train_ratio
- valid_ratio = args.valid_ratio
- data_file_path = args.data_file_path
- steps_per_day = args.steps_per_day
- norm_each_channel = args.norm_each_channel
- if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.
-
- # read data
+# Hyperparameters
+dataset_name = 'Weather'
+data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.csv'
+graph_file_path = None
+output_dir = f'datasets/{dataset_name}'
+target_channel = [0] # Target traffic flow channel
+add_time_of_day = True # Add time of day as a feature
+add_day_of_week = True # Add day of the week as a feature
+add_day_of_month = True # Add day of the month as a feature
+add_day_of_year = True # Add day of the year as a feature
+steps_per_day = 144 # Number of time steps per day
+frequency = 1440 // steps_per_day
+domain = 'weather'
+feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
+regular_settings = {
+ 'INPUT_LEN': 336,
+ 'OUTPUT_LEN': 336,
+ 'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+ 'NORM_EACH_CHANNEL': True,
+ 'RESCALE': False,
+ 'METRICS': ['MAE', 'MSE'],
+ 'NULL_VAL': np.nan
+}
+
+def load_and_preprocess_data():
+ '''Load and preprocess raw data, selecting the specified channel(s).'''
df = pd.read_csv(data_file_path)
- df_index = pd.to_datetime(df["date"].values, format="%Y/%m/%d %H:%M").to_numpy()
+ df_index = pd.to_datetime(df['date'].values, format='%Y-%m-%d %H:%M').to_numpy()
df = df[df.columns[1:]]
df.index = df_index
-
data = np.expand_dims(df.values, axis=-1)
-
data = data[..., target_channel]
- print("raw time series shape: {0}".format(data.shape))
-
- # split data
- l, n, f = data.shape
- num_samples = l - (history_seq_len + future_seq_len) + 1
- train_num = round(num_samples * train_ratio)
- valid_num = round(num_samples * valid_ratio)
- test_num = num_samples - train_num - valid_num
- print("number of training samples:{0}".format(train_num))
- print("number of validation samples:{0}".format(valid_num))
- print("number of test samples:{0}".format(test_num))
-
- index_list = []
- for t in range(history_seq_len, num_samples + history_seq_len):
- index = (t-history_seq_len, t, t+future_seq_len)
- index_list.append(index)
-
- train_index = index_list[:train_num]
- valid_index = index_list[train_num: train_num + valid_num]
- test_index = index_list[train_num +
- valid_num: train_num + valid_num + test_num]
-
- # normalize data
- scaler = standard_transform
- # Following related works (e.g. informer and autoformer), we normalize each channel separately.
- data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)
-
- # add temporal feature
- feature_list = [data_norm]
+ print(f'Raw time series shape: {data.shape}')
+ return data, df
+
+def add_temporal_features(data, df):
+ '''Add time of day and day of week as features to the data.'''
+ l, n, _ = data.shape
+ feature_list = [data]
+
if add_time_of_day:
# numerical time_of_day
- tod = [i % steps_per_day / steps_per_day for i in range(data_norm.shape[0])]
+ tod = [i % steps_per_day / steps_per_day for i in range(l)]
tod = np.array(tod)
tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(tod_tiled)
@@ -96,78 +69,52 @@ def generate_data(args: argparse.Namespace):
doy_tiled = np.tile(doy, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(doy_tiled)
- processed_data = np.concatenate(feature_list, axis=-1)
-
- # save data
- index = {}
- index["train"] = train_index
- index["valid"] = valid_index
- index["test"] = test_index
- with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(index, f)
-
- data = {}
- data["processed_data"] = processed_data
- with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
- pickle.dump(data, f)
-
-
-if __name__ == "__main__":
- # sliding window size for generating history sequence and target sequence
- HISTORY_SEQ_LEN = 96
- FUTURE_SEQ_LEN = 336
-
- TRAIN_RATIO = 0.7
- VALID_RATIO = 0.1
- TARGET_CHANNEL = [0] # target channel(s)
- STEPS_PER_DAY = 144 # sampling rate: every 1 hour
-
- DATASET_NAME = "Weather" # sampling frequency: every 1 hour
- TOD = True # if add time_of_day feature
- DOW = True # if add day_of_week feature
- DOM = True # if add day_of_month feature
- DOY = True # if add day_of_year feature
-
- OUTPUT_DIR = "datasets/" + DATASET_NAME
- DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.csv".format(DATASET_NAME)
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--output_dir", type=str,
- default=OUTPUT_DIR, help="Output directory.")
- parser.add_argument("--data_file_path", type=str,
- default=DATA_FILE_PATH, help="Raw traffic readings.")
- parser.add_argument("--history_seq_len", type=int,
- default=HISTORY_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--future_seq_len", type=int,
- default=FUTURE_SEQ_LEN, help="Sequence Length.")
- parser.add_argument("--steps_per_day", type=int,
- default=STEPS_PER_DAY, help="Sequence Length.")
- parser.add_argument("--tod", type=bool, default=TOD,
- help="Add feature time_of_day.")
- parser.add_argument("--dow", type=bool, default=DOW,
- help="Add feature day_of_week.")
- parser.add_argument("--dom", type=bool, default=DOM,
- help="Add feature day_of_week.")
- parser.add_argument("--doy", type=bool, default=DOY,
- help="Add feature day_of_week.")
- parser.add_argument("--target_channel", type=list,
- default=TARGET_CHANNEL, help="Selected channels.")
- parser.add_argument("--train_ratio", type=float,
- default=TRAIN_RATIO, help="Train ratio")
- parser.add_argument("--valid_ratio", type=float,
- default=VALID_RATIO, help="Validate ratio.")
- parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
- args = parser.parse_args()
-
- # print args
- print("-"*(20+45+5))
- for key, value in sorted(vars(args).items()):
- print("|{0:>20} = {1:<45}|".format(key, str(value)))
- print("-"*(20+45+5))
-
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- args.norm_each_channel = True
- generate_data(args)
- args.norm_each_channel = False
- generate_data(args)
+ data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
+ return data_with_features
+
+def save_data(data):
+ '''Save the preprocessed data to a binary file.'''
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ file_path = os.path.join(output_dir, 'data.dat')
+ fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
+ fp[:] = data[:]
+ fp.flush()
+ del fp
+ print(f'Data saved to {file_path}')
+
+def save_description(data):
+ '''Save a description of the dataset to a JSON file.'''
+ description = {
+ 'name': dataset_name,
+ 'domain': domain,
+ 'shape': data.shape,
+ 'num_time_steps': data.shape[0],
+ 'num_nodes': data.shape[1],
+ 'num_features': data.shape[2],
+ 'feature_description': feature_description,
+ 'has_graph': graph_file_path is not None,
+ 'frequency (minutes)': frequency,
+ 'regular_settings': regular_settings
+ }
+ description_path = os.path.join(output_dir, 'desc.json')
+ with open(description_path, 'w') as f:
+ json.dump(description, f, indent=4)
+ print(f'Description saved to {description_path}')
+ print(description)
+
+def main():
+ # Load and preprocess data
+ data, df = load_and_preprocess_data()
+
+ # Add temporal features
+ data_with_features = add_temporal_features(data, df)
+
+ # Save processed data
+ save_data(data_with_features)
+
+ # Save dataset description
+ save_description(data_with_features)
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/data_preparation/run.sh b/scripts/data_preparation/run.sh
index f602caf5..af1613e7 100755
--- a/scripts/data_preparation/run.sh
+++ b/scripts/data_preparation/run.sh
@@ -1,24 +1,29 @@
#!/bin/bash
# spatial-temporal forecasting
-python scripts/data_preparation/METR-LA/generate_training_data.py --history_seq_len 12 --future_seq_len 12
-python scripts/data_preparation/PEMS-BAY/generate_training_data.py --history_seq_len 12 --future_seq_len 12
-python scripts/data_preparation/PEMS03/generate_training_data.py --history_seq_len 12 --future_seq_len 12
-python scripts/data_preparation/PEMS04/generate_training_data.py --history_seq_len 12 --future_seq_len 12
-python scripts/data_preparation/PEMS07/generate_training_data.py --history_seq_len 12 --future_seq_len 12
-python scripts/data_preparation/PEMS08/generate_training_data.py --history_seq_len 12 --future_seq_len 12
+python scripts/data_preparation/METR-LA/generate_training_data.py
+python scripts/data_preparation/PEMS-BAY/generate_training_data.py
+python scripts/data_preparation/PEMS03/generate_training_data.py
+python scripts/data_preparation/PEMS04/generate_training_data.py
+python scripts/data_preparation/PEMS07/generate_training_data.py
+python scripts/data_preparation/PEMS08/generate_training_data.py
# long-term time series forecasting
-python scripts/data_preparation/ETTh1/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/ETTh2/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/ETTm1/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/ETTm2/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/Electricity/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/Weather/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/ExchangeRate/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/Illness/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/Traffic/generate_training_data.py --history_seq_len 336 --future_seq_len 336
+python scripts/data_preparation/ETTh1/generate_training_data.py
+python scripts/data_preparation/ETTh2/generate_training_data.py
+python scripts/data_preparation/ETTm1/generate_training_data.py
+python scripts/data_preparation/ETTm2/generate_training_data.py
+python scripts/data_preparation/Electricity/generate_training_data.py
+python scripts/data_preparation/Weather/generate_training_data.py
+python scripts/data_preparation/ExchangeRate/generate_training_data.py
+python scripts/data_preparation/Illness/generate_training_data.py
+python scripts/data_preparation/Traffic/generate_training_data.py
-python scripts/data_preparation/METR-LA/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/PEMS-BAY/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/PEMS04/generate_training_data.py --history_seq_len 336 --future_seq_len 336
-python scripts/data_preparation/PEMS08/generate_training_data.py --history_seq_len 336 --future_seq_len 336
+python scripts/data_preparation/CA/generate_training_data.py
+python scripts/data_preparation/GBA/generate_training_data.py
+python scripts/data_preparation/GLA/generate_training_data.py
+python scripts/data_preparation/SD/generate_training_data.py
+
+python scripts/data_preparation/BeijingAirQuality/generate_training_data.py
+
+python scripts/data_preparation/Gaussian/generate_training_data.py
+python scripts/data_preparation/Pulse/generate_training_data.py
diff --git a/tutorial/config_design.md b/tutorial/config_design.md
new file mode 100644
index 00000000..ff8fd1c9
--- /dev/null
+++ b/tutorial/config_design.md
@@ -0,0 +1,40 @@
+# 📜 Configuration Design
+
+The design philosophy of BasicTS is to be entirely configuration-based. Our goal is to allow users to focus on their models and data, without getting bogged down by the complexities of pipeline construction.
+
+The configuration file is a `.py` file where you can import your model and runner, and set all necessary options. BasicTS uses EasyDict as a parameter container, making it easy to extend and flexible to use.
+
+The configuration file typically includes the following sections:
+
+- **General Options**: Describes general settings such as configuration description, `GPU_NUM`, `RUNNER`, etc.
+- **Environment Options**: Includes settings like `TF32`, `SEED`, `CUDNN`, `DETERMINISTIC`, etc.
+- **Dataset Options**: Specifies `NAME`, `TYPE` (Dataset Class), `PARAMS` (Dataset Parameters), etc.
+- **Scaler Options**: Specifies `NAME`, `TYPE` (Scaler Class), `PARAMS` (Scaler Parameters), etc.
+- **Model Options**: Specifies `NAME`, `TYPE` (Model Class), `PARAMS` (Model Parameters), etc.
+- **Metrics Options**: Includes `FUNCS` (Metric Functions), `TARGET` (Target Metrics), `NULL_VALUE` (Handling of Missing Values), etc.
+- **Train Options**:
+ - **General**: Specifies settings like `EPOCHS`, `LOSS`, `EARLY_STOPPING`, etc.
+ - **Optimizer**: Specifies `TYPE` (Optimizer Class), `PARAMS` (Optimizer Parameters), etc.
+ - **Schduler**: Specifies `TYPE` (Scheduler Class), `PARAMS` (Scheduler Parameters), etc.
+ - **Curriculum Learning**: Includes settings like `CL_EPOHS`, `WARMUP_EPOCHS`, `STEP_SIZE`, etc.
+ - **Data**: Specifies settings like `BATCH_SIZE`, `NUM_WORKERS`, `PIN_MEMORY`, etc.
+- **Valid Options**:
+ - **General**: Includes `INTERVAL` for validation frequency.
+ - **Data**: Specifies settings like `BATCH_SIZE`, `NUM_WORKERS`, `PIN_MEMORY`, etc.
+- **Test Options**:
+ - **General**: Includes `INTERVAL` for testing frequency.
+ - **Data**: Specifies settings like `BATCH_SIZE`, `NUM_WORKERS`, `PIN_MEMORY`, etc.
+
+For a complete guide on all configuration options and examples, refer to [examples/complete_config.py](../examples/complete_config.py).
+
+## 🧑💻 Explore Further
+
+- **🎉 [Getting Stared](./getting_started.md)**
+- **💡 [Understanding the Overall Design Convention of BasicTS](./overall_design.md)**
+- **📦 [Exploring the Dataset Convention and Customizing Your Own Dataset](./dataset_design.md)**
+- **🛠️ [Navigating The Scaler Convention and Designing Your Own Scaler](./scaler_design.md)**
+- **🧠 [Diving into the Model Convention and Creating Your Own Model](./model_design.md)**
+- **📉 [Examining the Metrics Convention and Developing Your Own Loss & Metrics](./metrics_design.md)**
+- **🏃♂️ [Mastering The Runner Convention and Building Your Own Runner](./runner_design.md)**
+- **📜 [Interpreting the Config File Convention and Customizing Your Configuration](./config_design.md)**
+- **🔍 [Exploring a Variety of Baseline Models](../baselines/)**
diff --git a/tutorial/dataset_design.md b/tutorial/dataset_design.md
new file mode 100644
index 00000000..1e71a2ec
--- /dev/null
+++ b/tutorial/dataset_design.md
@@ -0,0 +1,69 @@
+# 📦 Dataset Design
+
+## ⏬ Data Download
+
+To get started with the datasets, download the `all_data.zip` file from either [Google Drive](https://drive.google.com/drive/folders/14EJVODCU48fGK0FkyeVom_9lETh80Yjp?usp=sharing) or [Baidu Netdisk](https://pan.baidu.com/s/1shA2scuMdZHlx6pj35Dl7A?pwd=s2xe). After downloading, unzip the files into the `datasets/` directory:
+
+```bash
+cd /path/to/BasicTS
+unzip /path/to/all_data.zip -d datasets/
+mv datasets/all_data/* datasets/
+rmdir datasets/all_data
+```
+
+These datasets are preprocessed and ready for immediate use.
+
+## 💿 Data Format
+
+Each dataset contains at least two essential files: `data.dat` and `desc.json`:
+
+- **`data.dat`**: This file stores the raw time series data in `numpy.memmap` format with a shape of [L, N, C].
+ - **L**: Number of time steps. Typically, the training, validation, and test sets are split along this dimension.
+ - **N**: Number of time series, also referred to as the number of nodes.
+ - **C**: Number of features. Usually, this includes [target feature, time of day, day of week, day of month, day of year], with the target feature being mandatory and the others optional.
+
+- **`desc.json`**: This file contains metadata about the dataset, including:
+ - Dataset name
+ - Domain of the dataset
+ - Shape of the data
+ - Number of time slices
+ - Number of nodes (i.e., the number of time series)
+ - Feature descriptions
+ - Presence of prior graph structures (if any)
+ - Regular settings:
+ - Input and output lengths
+ - Ratios for training, validation, and test sets
+ - Whether normalization is applied individually to each channel (i.e., time series)
+ - Whether to re-normalize during evaluation
+ - Evaluation metrics
+ - Handling of outliers
+
+## 🧑💻 Dataset Class Design
+
+
+
+
+
+In time series forecasting, datasets are typically generated from raw time series data using a sliding window approach. As illustrated above, the raw time series is split into training, validation, and test sets along the time dimension, and samples are generated using a sliding window of size `inputs + targets`. Most datasets adhere to this structure.
+
+BasicTS provides a built-in `Dataset` class called [`TimeSeriesForecastingDataset`](../basicts/data/simple_tsf_dataset.py), designed specifically for time series data. This class generates samples in the form of a dictionary containing two objects: `inputs` and `target`. `inputs` represents the input data, while `target` represents the target data. Detailed documentation can be found in the class's comments.
+
+## 🧑🍳 How to Add or Customize Datasets
+
+If your dataset follows the structure described above, you can preprocess your data into the `data.dat` and `desc.json` format and place it in the `datasets/` directory, e.g., `datasets/YOUR_DATA/{data.dat, desc.json}`. BasicTS will then automatically recognize and utilize your dataset.
+
+For reference, you can review the scripts in `scripts/data_preparation/`, which are used to process datasets from `raw_data.zip` ([Google Drive](https://drive.google.com/drive/folders/14EJVODCU48fGK0FkyeVom_9lETh80Yjp?usp=sharing), [Baidu Netdisk](https://pan.baidu.com/s/1shA2scuMdZHlx6pj35Dl7A?pwd=s2xe)).
+
+If your dataset does not conform to the standard format or has specific requirements, you can define your own dataset class by inheriting from `torch.utils.data.Dataset`. In this custom class, the `__getitem__` method should return a dictionary containing `inputs` and `target`.
+
+## 🧑💻 Explore Further
+
+- **🎉 [Getting Stared](./getting_started.md)**
+- **💡 [Understanding the Overall Design Convention of BasicTS](./overall_design.md)**
+- **📦 [Exploring the Dataset Convention and Customizing Your Own Dataset](./dataset_design.md)**
+- **🛠️ [Navigating The Scaler Convention and Designing Your Own Scaler](./scaler_design.md)**
+- **🧠 [Diving into the Model Convention and Creating Your Own Model](./model_design.md)**
+- **📉 [Examining the Metrics Convention and Developing Your Own Loss & Metrics](./metrics_design.md)**
+- **🏃♂️ [Mastering The Runner Convention and Building Your Own Runner](./runner_design.md)**
+- **📜 [Interpreting the Config File Convention and Customizing Your Configuration](./config_design.md)**
+- **🔍 [Exploring a Variety of Baseline Models](../baselines/)**
diff --git a/tutorial/figures/DatasetDesign.jpeg b/tutorial/figures/DatasetDesign.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..aff7d470b3b7ba83118d43c32164c147fc0f7ae8
GIT binary patch
literal 144113
zcmeFY2|SeF+dqEW5@He}Vv3|fB1<7t$(Ev2gfx}i#3Xx|k;=|wFJcs;MUy@2WN)|c
z%UDXbVa77<%=W+a*+1Xyd7l6Cd%b?I*Z+Ul&2jE?pSjL;uJgX$*LBXY``BZ^2Lpds
zCjdBo8rTN_z(!yL#}C7s=hj`=*7_@f{bfZFcZZ
z{xZ7tx31cI-8gpU%&GPAZ@<^ee_XVlbxi<3r}BDVbIk0xL-&jt+)HZAZw*MO{L${8
zmiQeUz3ibU4Cp>!f8(Y%q*EoRe8$iF#(FsjDvREPgo4VDY5-B^U&=Y_&-~xC;L|_)_ZV-$|isZU=QpEl%TR2
zZ~(g1*7Xd%3thDCU-hT-N8KF~030E*+3c!6>P`{?pim0{_$&UX+j9s2garWLUB!*7
zH?RKeZrwIHognL4%mV;HYXA^wgWj?AayqE?^Et<+;{d=j!e%e+hD<6E0GRjLZ2A*6
zo0$y&Ttfix)`L9=+=a|z{o;iFb8&KVa&dD(7dH?0`o*(>=jXNI*NgY(wQ>F8`~CX)
zHYX=H^q+47&xYUY|H}pTFl5cA*lz*hjhq_XZ@D;R0Zw5KE@2LKBgYeH)_8sj$nkSf
z&<{vXNHE@weEb5N0ZtAsE>3PP9!Os3N$~mt;uhxFvQzcg29dK@d1XCC)o#Z>-Y9px
zuujaPowDoTwVR=Q{9DB(wn@tGR@k$5pSp&o)}h1N`X>wwjf_oBo;!cR(h5?$y@R8Z
zvx}>nm$#3vpMSuuJ9qDeg-1k2B_t*#r=&i3nD!()C--Sy{q~>l+%I
znp@tz|IpFd)!ozE*FP{eKJoS2V_@H&hlRM6p>ZCy;1ad{NutpKDmPy6tQbJ+xfTdQXiA2
zt`F(wi2iE>3jH6A==TBrJ|6Y}u!)NUnlLV500FRAd!9rA|NT3y#U0R&a=GGqa}S5x
z;q8W1gB(T{{!IwkYG+!suX#TkXiXe2@jRH{r_nuCq9V_>d`tqMDD!
zy`Tn;nujjiw%*a2el`{MVfY)Rwm^}Y#3h-?%IFMThG%j`E0}{SSAC
zXWSma_@hEk5@lne0KTRZi8*(?9S&^Y?iG4S4tLTE{(&e|4a@Z(Y=8>~1ygxPSv3mb
zD-}WZ1c3)R=RBVxBNlSci^sZ)awW?pzX*H29{e`r`5Dxr6fK1k-@^vntA_my
zx(MFV=+l(T5lW>;w9WVV2YoiO8oGPHCHEff73dT@;hvTJ;h9XsC}L2t5eW-*!Inzr
zoax^$rfc=okPTds%hJAA-YOq0qh-7)W!O4%=e;agdzzxwsqW<()ag6c?H;mKC6%#`
zVJ>!;56fLmc5wVq6#s6=XjD6|I#6=f01*AZ?8dU&{%4l1m`I(>4R`{=t$VyD7w46g
zJa$LE7LqyiFlm%pC%v)BC%K++h7BY*kE_r(*s+1Blq}dgM7RjUiHc~b;c6TpG{MBA
z3XIH3D<8b1MOA%^w0r%e7jU5Po+KlRSc+_*-HhtY;eb{Mj
ztK2qTDP}4a$%^ueMo?sX*+4uB-l!))Wdl*m0we+O&bU$3ypoSpfU<|ImgNVTZTGWZ
zS@sDh@49hpnQ-z+$X?fh;HQas$8s;e>Ql}(&bIKndsNKmh`-nu|Ax1j64dUJ17H}Z
zQwdhPV}{p%e}U6>KKEC$ib`sGrrIkrht(8cKM7Is7oo+2EtE@7lc{EkI2rJop7@30
zNNgEdgq7(o%b)s=mr+uF9?!)*@i_(l7!IqI2VvucupQLAhFF_H>yCM~E#v(S`Ho}p
zA>VfCH^>1W3^#IHeK+G{14mx8I#Z5J#*atp$p!80-c7+r=J3l`7oVXF-=!Cl2DIiq
z?hA2TKY1r22Uh@K-f`jUGmxUJ57<&e0`=qhUR}QN3~AYJ#=%056+8K~i4zsRIzexo
z&}oWE&5k9(pSK!)8J(={;?+IFmH%&G?l=dbCBqg8)=Zd3UDi!v9PM4Ma@T6q(hkne
zxSP^$+rU$O=H7*K9-oI2Ot+X2`ff4&+Yn!~gG|^!VI>=214qL`cdLwK^qx&eFDJM&Uc
z@5Ukn+00Kd2aI~2`NNy>l0@gx6c)!DtT?1Ots$Oz+UD9HdLg$1OgG=#gvM9j2Zs
zZB(_BR+wJSf|b__V%pfiCcVQpUv#(mQr&!aVn{icAD4AqkM)SHr4)0%PhrE6np+%tPQTcnLh@B0kBD=@0z%{V|PrrN52v6=aT
zCF_dZs*dmP9WQYm9~?ftt-vRC_l|N)s%*-VM)WS`knp4Hw)w`fUv~yQnBrP4N;BJg
z>|MTxyzx%goUt=oJX_7L9u?tA<7*LPB(Z@yRpRJ$D>qa)K^%Q-Nh-8r>`TWBxL)4t
zZV+^^Aj9}ls{+lZq&Ea<6P%_ho`L2nvwz5Vrn+q;=Ur_J8{ny819xJUN#HI7nQxR0
z^!Mt6S4Q-@H3}qEgxwCFeui~2;7RH|IM5)3^AM5hSh=A@K+4gCt$Q$d$a)Rp;>KDVgE7pbcc(c@nL+ei!8_n6hetK1zYTN{dmz7
zSL{DA-XP0aHXxwW^a$JedEHWb9SwjRmAs)I>O`^2;Kb!#4a8=*-ky{mL9b^g6Vz;N
zbUO?rH%`Q!=Y4ZOQ|;yv9=}~&b(|?tz!GWG6y`9fl5g?~^Cd0mmfFcl+l&olo6}pT
z`@Z!Ze}g(~Tiw$XFljv5tVkx^!I$=i>dB!F1RMd=OHm=&P7#=9c@-&8D+u5a%cos#B0CMR6KaP`U+xR?&tKr!wZ}{(^)UP?f$56d6k?6vtJc^Hhkgz7}<~M}r5s983yOa!*~=bTQ)nD1+M1
zonR8;!u1Ww9V7vMqxL$^XYI#G`=2xm>d!`!XWzBDjNVtjb?X4@a6@aHZiKBMGK<8=
z5@4u0APtu>q$~+uM|Ywd_Ui)N0d6Z(v?dRN=x$|
z@L6iM8zjqPre({p*KwdcaSj4;9xCpBcE?B|_B;77=vqa??I-EP2WmH%3t
zwjo8D4QM6q^0ZrrdJmaBa~rj2(}oXn4%IFN+x%5Y}`
zc>=hOV$>`;23}NtyH?G=meg`9^TkV-fk#`hSJbAhj>FW3?Q%}|c*kQbeRt#|tGzN+
zC$SK$K*aGcu;L2i(jfoCgXuh)5Cx*<4+ooN@QcFh5R}i=)cUb5hnzOy`pzT!7>90K!uDaeS_N!4R$#fJ
z+G^Yz_-OBNyv0Q#h!E^eswj=Q9bf$;j(2_{F&FUD^7Bm#ewn(un6s7g%rCxg-;@0J
zyD|YT0%#>anvRabmvH5SBF@rJLm668rN;OVYr=FDdVV
zF_Qs*-R+iT6Mde@=Da$U0l#Jv#S9xtDr+V0q#yA|81(u$F`qA}E52|)Zd4jQck5B5
zPv!Lk`zKY0E;AyNFDyy_XzpR|rK&JQi)oXf2-R<*caBM2t;6uuQmq=;z}<6>Ocz8EOPHjvAg$dMW9G5$lzLV>&%nT?i$(=4*&1Ej9#wm#Bs)qyYy+&e
ze+94xO%csJQiaWmlolGAnU8gEmBiK=vjG%3?DDQI?_lg((gt|L`y3F{YL=q-<4IS^
z;T2o|0uQ~8^LD1Y;&VSG=0tttO}Ve~VLyab`-r1?m~{}i>;J_1TQ5Vz&k;RF^(z2T
zr^~z4TSdWhcGg*Orxh)VyA*t@%I%h9Ne!EX)$b=S_TK!;wYqV8&*fr*OQv**_!{%;
z)&pAyCodTn?)H4-7$MEUcyU_$TCEi%qdn;kyo{U0T?npRNt+BGH5WD-k5CW1YobKZ
z5C}AlwndgU&jj^lX({h(TD7?l^LnS}QJD!8Rcox9JY>1mhL*0>c_8jk$NZML&u=fB
z>EE>sSfSsnSuH+h1Ki`x5QHN{;W4u`BE>I8S0&GNHB*9_QyNVaa$5_|t=3vo{*ow@
z?B~#95wRrdd9u3xjiI5p+$PjKLKISg5z!TK2T!p;-@_Lnn&m-M;20??E$l_^?)0$?
z%lXyDO3{>*n4+ze0f{rlLtItlji(9!PEUJQ$7PyAW6MmIJmS-fab>Ba|;*po=)*_l;_?EmK
zcS$RR4UE3RmOcdWV`TAhD~2MSuraUxAhi=pH-ta#a!NI)%t;({70w=hzpb`zSnw-h
z`?-DE%!eBVkHR}fCZ-MNJ&_^s!c~SSo%WHLTkT9~LbJp#*?`UyTf_5bQr7OuY^561
z;P33R-5Hz{i?r(6QQo=YuVSxfomN+nbh`B1C!;M^A*}aU$~iVLXw`tFoS>_KdsWE%
z16HxGR-`SjD)zZ*MXeb`gHLBq*Ai<&dz?|}r!~6*;_KW+EdtKm4d7Eg0I=|NttV|j
z7>loUJ*c5of_t9_UF>397;W9|iXZf{nW|j9%>ZAbOTLwrCCf~HmH*VGb*+8TEosf%#qZlY
z#Nl~z!(JB-ph@``vc+Z6ae_LA
zx#_i+y{bIkK5uZVG|H=n`@(CuxM~&1&K^Ju_O>gLV!jYYqFjLTMwdA$5}?Mb%l*g3
zV-KFH>>`%GyslD;oJ;Cb{`w#|b@iN@N2%=b(NOu*p9({;7PpsT&Q?(f-y6oiGsF^U
z(V!Pylhuz9S6>kFzd3P{`7p?enV5Tg)>pYykxMM&%WT%}k;ETb5E65bWRdG8I`kwk
zUGTEhMm(rw6p3(-Ms9Z5N)jwF50}|aJ?U!UTC5Isdt~f4uCPtoJ$s<2MUIQ#sPu$R
zc~uDJt3yW=V;eIBQXf$m;SL+HK;K>MWeK${s7J0;Aw+|8t3ug;88t8L+v|*p;-u0c
zl5@BFq=CL}Sy_98W5`U7?m%=As?`?B>yZydm
zA~FiQ>0y~lL`m^-RXC&|!Vcz3q9gnsvJBlIfDFZN-{G!bj~tZrD9E3F6rdf^@oLMW
zfbIQb!ydiDCl>Rl&vM622FYS}{bv(~dHqVMlWil(WeG#Bsmd>S)klUVN-3IibIA
zux@vfNA>qxF2V8sqImc3_(1v8BfJtw;Ej~L-`^eVP+oj8h#*pjJF;y
zZBCi6rldB)wohepYa7%)^HGU4sC`XxD(~HOuV;ii`U}v?d2G+=sTT*ozV4&20fwj)
zZJOFjUZA$noG4ZexJ{S9gQI$RS?^!YII+m3ImTkkHL3#s2E
z+0L9wm2$4g+;E`5tEgwstFGoY;|=CdFdyNRi>(9Zw8y=*l0jA!WPR{C3(%2j)VMhH
zdY}(PHsaB3C0_E4TQheMdvnSprJJgY^tZh!D|gf^`~b_?Yr+Y+uF#{mU~c||Nh0n1
zx&>oMJ*2_~vOfJH!zSDaO!uO^Xuho0?oEJARk}Cjenf=NUoDHhwpSO*z^a~*vBZ5y
z7+pN|61A+u;!CHaXAtGEu%NROOvUCAUNC%2$ND;yx?92V>IdYRh53I)MIW&>>Ds^F
zbw_Awb?~R5H
zx%g_Ip2t6J`8qNrC6)fZd64|IPAas69Nc~NWeP4qR=q1_>f!VGZMaQ>0iSPYVKMrH8D7=e6;!1{=X
zSBsB6O|=HEz6{c**iLD!&dI&$kT*_0*_~Gr8T7XK0>Z64^Z+m}_h#+~x}Hd}A`UJ=
zoUBNtP6gbv+KlG}HwQ_9;n-}~0cKdN*E4B|G}u&ZJu`Dwchsi(?5DRiuX=*iZmWJ&
z1F&YtYVrlsSYE9C*g7P_g$;xeUwN?@v&&e&I0Vx@Ps`&$J&%0kJ53NwQ8Pa^ytwp{z|z
zTkDPzsoh
zuTeLOUcD82aJ4(B=ss+w_tW>=O+2`($S`;bjv?wrOJycQ6!q@99m!$q%ZQZl#sw)`
ze^qrm#dB3}sP~d?-L*&;wDHfqC~NDNph9Xc`&5WNZ}OGC@^B0R?qLJu&I$NH)dZ>G
zh}0N9%tM!lu7`4$DGd~34$f*-o|_^(8!*ZnP;xqq&O`4xg@h1~(8
zp2lzmz-mdEV#MN~rrR?vQCr>y>rKML&rTt?1?@#N)||9EqM`T#w+TcWC-7+~>n*vz
zw?^*@J-10b*L}>i#3il2EHqe=W&o+9lGQ~l6_3P-`_oT?=J~CZ)$z&4KJMFnF%r~$
z=3^N;*IQ0?`$+qa538f-f!UIL*`=z%_|H;~cwPYB243xWB!v2r$xOnIJ|UGqO@yfH
z5ZFuiWVqr?z~sPDq6nB!|Iwtqr0Rw1gx$f5w!Degy*^jgf9ca@S+snAFro9*Nq%;D
zmc>^D#Ha22!ZrpZ$C
z`yB#ro`+zP{<;nM=Pk+qe%TU}_uREoIeo^arqME`eN%O@c5{>(@KF=^9@R$p2nUtt
zaZgcriZO9uU#r#a{X5M$-Ikue*~DE&+4kfHY@3O&wfQv;QI54cj``Lzn(-vpj@(9(
zJuY8k@5t?)Iv=-D1MREbyW~Z&rLQ2ZeuBCuzq;G-p07*1zJFdu6^DNOSYmm
zPRF`H@}rrmiMylBW%*4&thMPwt{+jds_&!6aG+8P`3@B}sx>z}-pUiCO?h-#6|9(G
z`i-?jkQYh?`j%$}&lv{hr=YZ=_R6CIFz*)2D4!)bcfkD~WWhUV6TREd6;C5SVb>Gv
zJdw;YtOJ}6(@qqu)ddlCsR)r;0e`dswK>u&NAoc={1q5@p=(S1(Y98FfdY?$N6Cn4
zZ@=yP@`o_J%q%P#Js|RySf&6aE*xP%-;}WJ0E4?5v(IHM_pA6pQ=u1mGr|`IY$Fo<
zJ5ZNQh+kFTKiYpo?I^-YYx{)gs2B3HN(f>!4^fJE3kT&^>+pkET2hyKMgSvt|7{_{
zc*L^pk5S5e#`9xJ<&w<=d;0@{C2{BDq*I6vr#p9W9BsmUz|_N~8Mbs|hy`Gq2;#Gu
zTc|pu;Su?##iNlU?owxpqI^wJ5ti0YDSVO#xM_>VpEuP_>>|Gh=)#c_Z(kv;sJ(Y|
z5#CBZ!!{p2Tzgz^?z!E)gd)_lW1^$-$w4F(#ZWGi`uu1?rA!-c
zhGx;aHVltc2?Q`_zvCH;5+U`=?KyzmbhRRcsk
zlyqK?j#hF&ztPOpiG>ZITwNuREKab6_|W+`6*XNY1#+jgC1=pP3nY&`Og&qX><~5Z
zK_CfNKCr9Lx>^8obhhY$*xD@w9K&i1DM*^ZXo0B;^#rZ~e@v%!*Wbws-tgQU<+Ljc{$ehD!%^Xbq)VikKOI<>WKW;H!Vn?46DQAKY(hgF!B7-!t
z@Zp)eDc>7g1wb?SmYGcP9`$q;VVAVw&((&jrk$n4eKTJS5)8w}4-2-WJP)eS(l&v2
zlFI!Uq7xGebJkR39kRH!(a!Jf2yeIUg|Qa}W*?g~_jUSQpk(H~YViAJ^!R1jsy$u#
zu0)z`)x*gXo8Ck~TaP1IuBZfxOuDSRy(4kydm8@22s2)XGeuMjfi&1
zSA=z5cwO6hN~APr!Cmw7T;A7|-RI)FtX{H#1`-jivUY$Fg#@K?*#Om>gU&R-K-;+g
z@BmL)T^RgWJ|{wjW{}I9$l4!*z)lgqGa&DG1%Vdh!PEG`900yV|JjcMV=FIhA)c#+
z&cm?SvD`GD$_AR+co(RLa$BVpDZDHhIz0f}elN&^4ZIzO!n0^7RIpkYY`lrRLX0|!
z?fvBiewD{`hS}cOR>c8gf(kUmx^|WprHmLn7VJkmMU5?;&(JOFY=E}w3EN*+7+G?1
za#(CUq>!m7fvxM#+&PNtH*X?=T99A^-SYwwfpvim(vSD72-;ulCOQ*0Gc-yl87YsP0}S{3NI2XccSMn4%;-
z@kZ=N02>gt1QS`Y=1s(31%ol9ZO0F?0nNN|5<_YLyLiNimJFLzWbGtu#PglWX_Kz|
z#f|*DLl2Q(ek?NV{BbV;3&oy0zDHKOKMgVj(PJdsz~pVpLJM42eB2!$_PG6}{An%Q
zK+s1ZX87Gma8s43@Z#f7XB&;3tofIyo1s197rKUr;YIjpECmg*w0Fxys+eCSVycT_
z3q1@pXBp)(bM=4R`9*TEUnHm7LK$xENh&@ZfohF~mk=8kqEZJoJfsd9canAWvu&o^
zgP$tvpX_kxP+EBDgE(aB{;l!7N=o7UlD&oe+2nkzGZx&(D!FTFhSx^%Gp^mczU?k}jN!0k%k
zKFpEDv*$2BSoNgW8NGA1
z$((yT6b`q-)7b!wIC>Yqg(dAu5>#4756D?8v9^F8&r?0_K!&uj=Yhx5U^H2lS!$9K
zAj$gC^Y!^)t`f*GH5u)M_q&_1W5ef#pyM0<^{L3c@5k5Gv!2uWAtn0IB
z^f;`z-%bFw-!L$>u-IIC*SYP*4}wYtzVI`+=o>L#NF`WEr4b`=W;#m*OvdbDh*HS~
ztVpWVsV){jqQ3RD%jc(;-X=;L&86@K`a7x)20t=VdUJD5Ol_RNFt8mrU(@*0oI$pR
zj99}q>;=oAj1LXZz?srdhA;6l^e^v#_Vcvypa3)=8kQykA)yb$iy7T%0D9(Q+A`&+
z#Cj5sA+1)7QZWB8&(_4*LC(e99bIaCW*BMUDs@eiSGx?
z9>`Lx$5r1klNh$GOoMf$XFx07${$CZ`+a%C2hJlNAjT!ByiATR2>*~j;6&1E;@r+L
z^S&g8ey{oqI*N=M$3@^GOCRS>PiIeW9!__0_jO%!GxenTM??oMWV*^2IyvidtoH5b
zUEG8iogoQpt(&u7nMNRzWkje@hk;71BIi?yOY&nyxPOB_r3Y_XY4fMh%sYB}@^^8t
z(sNf36zxQo43i#U&pM?zp32%Vjb+$*%kcdxw}Jf!NAfQ{%>T=#pqXBhNSq>NBV^i0
zXomrE7MkKzW9;BhPtx0*c?5!}482`J8q{QH%Uai&F6ZBSx@-Bp?u%$thUI7Z*w;ea
zEXB{<@M@6!P~d;gD!p*l;l~D@1L?UT+52L>qiRy$v5K@QC@8lCK`=x3AxmY3So-ww
zvmlGgQXK77XhYg^0MfHT5H#+QDnksj(5F0G0GgaxCD8};+%a>s|*
zK;PwcAevf-kZKb)VEDUn+^qf(N;h7Efnn9hpyO0N2#Pq%h};{CxRM%9yAK-Bk!~`o
zm2Xl=lWycbwE0s4tJROrW@{v>bV$`cWcS0i366L`iJ6&Bw!>LBe7WYfnP
z5==btI!t0l65U`cZAwR-rHGNj;U{|@mejZfzEQ8D)6+v+%uxCE6&uTh&EA(GZ&*z=_C(lX_dxyp{i{oZz7sxi4)smzpGT5#KvlM@Z8YB*Ug0Y|5FZOyWI#
z++W$+6*LMXhu