-
Notifications
You must be signed in to change notification settings - Fork 300
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
73e6827
commit 4f508ac
Showing
12 changed files
with
894 additions
and
437 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# @Time : Jan. 10, 2019 15:24 | ||
# @Author : Veritas YIN | ||
# @FileName : __init__.py | ||
# @Version : 1.0 | ||
# @IDE : PyCharm | ||
# @Github : https://github.com/VeritasYin/Project_Orion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# @Time : Jan. 10, 2019 15:26 | ||
# @Author : Veritas YIN | ||
# @FileName : data_utils.py | ||
# @Version : 1.0 | ||
# @IDE : PyCharm | ||
# @Github : https://github.com/VeritasYin/Project_Orion | ||
|
||
from utils.math_utils import z_score | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
class Dataset(object): | ||
def __init__(self, data, stats): | ||
self.__data = data | ||
self.mean = stats['mean'] | ||
self.std = stats['std'] | ||
|
||
def get_data(self, type): | ||
return self.__data[type] | ||
|
||
def get_stats(self): | ||
return {'mean': self.mean, 'std': self.std} | ||
|
||
def get_len(self, type): | ||
return len(self.__data[type]) | ||
|
||
def z_inverse(self, type): | ||
return self.__data[type] * self.std + self.mean | ||
|
||
|
||
def seq_gen(len_seq, data_seq, offset, n_frame, n_route, day_slot, C_0=1): | ||
''' | ||
Generate data in the form of standard sequence unit. | ||
:param len_seq: int, the length of target date sequence. | ||
:param data_seq: np.ndarray, source data / time-series. | ||
:param offset: int, the starting index of different dataset type. | ||
:param n_frame: int, the number of frame within a standard sequence unit, | ||
which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min). | ||
:param n_route: int, the number of routes in the graph. | ||
:param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default). | ||
:param C_0: int, the size of input channel. | ||
:return: np.ndarray, [len_seq, n_frame, n_route, C_0]. | ||
''' | ||
n_slot = day_slot - n_frame + 1 | ||
|
||
tmp_seq = np.zeros((len_seq * n_slot, n_frame, n_route, C_0)) | ||
for i in range(len_seq): | ||
for j in range(n_slot): | ||
sta = (i + offset) * day_slot + j | ||
end = sta + n_frame | ||
tmp_seq[i * n_slot + j, :, :, :] = np.reshape(data_seq[sta:end, :], [n_frame, n_route, C_0]) | ||
return tmp_seq | ||
|
||
|
||
def data_gen(file_path, data_config, n_route, n_frame=21, day_slot=288): | ||
''' | ||
Source file load and dataset generation. | ||
:param file_path: str, the file path of data source. | ||
:param data_config: tuple, the configs of dataset in train, validation, test. | ||
:param n_route: int, the number of routes in the graph. | ||
:param n_frame: int, the number of frame within a standard sequence unit, | ||
which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min). | ||
:param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default). | ||
:return: dict, dataset that contains training, validation and test with stats. | ||
''' | ||
n_train, n_val, n_test = data_config | ||
# generate training, validation and test data | ||
try: | ||
data_seq = pd.read_csv(file_path, header=None).values | ||
except FileNotFoundError: | ||
print(f'ERROR: input file was not found in {file_path}.') | ||
|
||
seq_train = seq_gen(n_train, data_seq, 0, n_frame, n_route, day_slot) | ||
seq_val = seq_gen(n_val, data_seq, n_train, n_frame, n_route, day_slot) | ||
seq_test = seq_gen(n_test, data_seq, n_train + n_val, n_frame, n_route, day_slot) | ||
|
||
# x_stats: dict, the stats for the train dataset, including the value of mean and standard deviation. | ||
x_stats = {'mean': np.mean(seq_train), 'std': np.std(seq_train)} | ||
|
||
# x_train, x_val, x_test: np.array, [sample_size, n_frame, n_route, channel_size]. | ||
x_train = z_score(seq_train, x_stats['mean'], x_stats['std']) | ||
x_val = z_score(seq_val, x_stats['mean'], x_stats['std']) | ||
x_test = z_score(seq_test, x_stats['mean'], x_stats['std']) | ||
|
||
x_data = {'train': x_train, 'val': x_val, 'test': x_test} | ||
dataset = Dataset(x_data, x_stats) | ||
return dataset | ||
|
||
|
||
def gen_batch(inputs, batch_size, dynamic_batch=False, shuffle=False): | ||
''' | ||
Data iterator in batch. | ||
:param inputs: np.ndarray, [len_seq, n_frame, n_route, C_0], standard sequence units. | ||
:param batch_size: int, the size of batch. | ||
:param dynamic_batch: bool, whether changes the batch size in the last batch if its length is less than the default. | ||
:param shuffle: bool, whether shuffle the batches. | ||
''' | ||
len_inputs = len(inputs) | ||
|
||
if shuffle: | ||
idx = np.arange(len_inputs) | ||
np.random.shuffle(idx) | ||
|
||
for start_idx in range(0, len_inputs, batch_size): | ||
end_idx = start_idx + batch_size | ||
if end_idx > len_inputs: | ||
if dynamic_batch: | ||
end_idx = len_inputs | ||
else: | ||
break | ||
if shuffle: | ||
slide = idx[start_idx:end_idx] | ||
else: | ||
slide = slice(start_idx, end_idx) | ||
|
||
yield inputs[slide] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# @Time : Jan. 02, 2019 22:17 | ||
# @Author : Veritas YIN | ||
# @FileName : main.py | ||
# @Version : 1.0 | ||
# @Project : Orion | ||
# @IDE : PyCharm | ||
# @Github : https://github.com/VeritasYin/Project_Orion | ||
|
||
import os | ||
|
||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | ||
from os.path import join as pjoin | ||
|
||
import tensorflow as tf | ||
|
||
config = tf.ConfigProto() | ||
config.gpu_options.allow_growth = True | ||
tf.Session(config=config) | ||
|
||
from utils.math_graph import * | ||
from data_loader.data_utils import * | ||
from models.trainer import model_train | ||
from models.tester import model_test | ||
|
||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--n_route', type=int, default=228) | ||
parser.add_argument('--n_his', type=int, default=12) | ||
parser.add_argument('--n_pred', type=int, default=9) | ||
parser.add_argument('--batch_size', type=int, default=50) | ||
parser.add_argument('--epoch', type=int, default=50) | ||
parser.add_argument('--save', type=int, default=10) | ||
parser.add_argument('--ks', type=int, default=3) | ||
parser.add_argument('--kt', type=int, default=3) | ||
parser.add_argument('--lr', type=float, default=1e-3) | ||
parser.add_argument('--opt', type=str, default='RMSProp') | ||
parser.add_argument('--graph', type=str, default='default') | ||
parser.add_argument('--inf_mode', type=str, default='merge') | ||
|
||
args = parser.parse_args() | ||
print(f'Training configs: {args}') | ||
|
||
n, n_his, n_pred = args.n_route, args.n_his, args.n_pred | ||
Ks, Kt = args.ks, args.kt | ||
# blocks: settings of channel size in st_conv_blocks / bottleneck design | ||
blocks = [[1, 32, 64], [64, 32, 128]] | ||
|
||
# Load wighted adjacency matrix W | ||
if args.graph == 'default': | ||
W = weight_matrix(pjoin('./dataset', f'PeMSD7_W_{n}.csv')) | ||
else: | ||
# load customized graph weight matrix | ||
W = weight_matrix(pjoin('./dataset', args.graph)) | ||
|
||
# Calculate graph kernel | ||
L = scaled_laplacian(W) | ||
# Alternative approximation method: 1st approx - first_approx(W, n). | ||
Lk = cheb_poly_approx(L, Ks, n) | ||
tf.add_to_collection(name='graph_kernel', value=tf.cast(tf.constant(Lk), tf.float32)) | ||
|
||
# Data Preprocessing | ||
data_file = f'PeMSD7_V_{n}.csv' | ||
n_train, n_val, n_test = 34, 5, 5 | ||
PeMS = data_gen(pjoin('./dataset', data_file), (n_train, n_val, n_test), n, n_his + n_pred) | ||
print(f'>> Loading dataset with Mean: {PeMS.mean:.2f}, STD: {PeMS.std:.2f}') | ||
|
||
if __name__ == '__main__': | ||
model_train(PeMS, blocks, args) | ||
model_test(PeMS, PeMS.get_len('test'), n_his, n_pred, args.inf_mode) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# @Time : Jan. 10, 2019 17:49 | ||
# @Author : Veritas YIN | ||
# @FileName : __init__.py | ||
# @Version : 1.0 | ||
# @IDE : PyCharm | ||
# @Github : https://github.com/VeritasYin/Project_Orion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# @Time : Jan. 12, 2019 19:01 | ||
# @Author : Veritas YIN | ||
# @FileName : base_model.py | ||
# @Version : 1.0 | ||
# @IDE : PyCharm | ||
# @Github : https://github.com/VeritasYin/Project_Orion | ||
|
||
from models.layers import * | ||
from os.path import join as pjoin | ||
import tensorflow as tf | ||
|
||
|
||
def build_model(inputs, n_his, Ks, Kt, blocks, keep_prob): | ||
''' | ||
Build the base model. | ||
:param inputs: placeholder. | ||
:param n_his: int, size of historical records for training. | ||
:param Ks: int, kernel size of spatial convolution. | ||
:param Kt: int, kernel size of temporal convolution. | ||
:param blocks: list, channel configs of st_conv blocks. | ||
:param keep_prob: placeholder. | ||
''' | ||
x = inputs[:, 0:n_his, :, :] | ||
|
||
# Ko>0: kernel size of temporal convolution in the output layer. | ||
Ko = n_his | ||
# ST-Block | ||
for i, channels in enumerate(blocks): | ||
x = st_conv_block(x, Ks, Kt, channels, i, keep_prob, act_func='GLU') | ||
Ko -= 2 * (Ks - 1) | ||
|
||
# Output Layer | ||
if Ko > 1: | ||
y = output_layer(x, Ko, 'output_layer') | ||
else: | ||
raise ValueError(f'ERROR: kernel size Ko must be greater than 1, but received "{Ko}".') | ||
|
||
tf.add_to_collection(name='copy_loss', | ||
value=tf.nn.l2_loss(inputs[:, n_his - 1:n_his, :, :] - inputs[:, n_his:n_his + 1, :, :])) | ||
train_loss = tf.nn.l2_loss(y - inputs[:, n_his:n_his + 1, :, :]) | ||
single_pred = y[:, 0, :, :] | ||
tf.add_to_collection(name='y_pred', value=single_pred) | ||
return train_loss, single_pred | ||
|
||
|
||
def model_save(sess, global_steps, model_name, save_path='./output/models/'): | ||
''' | ||
Save the checkpoint of trained model. | ||
:param sess: tf.Session(). | ||
:param global_steps: tensor, record the global step of training in epochs. | ||
:param model_name: str, the name of saved model. | ||
:param save_path: str, the path of saved model. | ||
:return: | ||
''' | ||
saver = tf.train.Saver(max_to_keep=3) | ||
prefix_path = saver.save(sess, pjoin(save_path, model_name), global_step=global_steps) | ||
print(f'<< Saving model to {prefix_path} ...') |
Oops, something went wrong.