Skip to content

Commit

Permalink
code update v1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
VeritasYin committed Jan 14, 2019
1 parent 73e6827 commit 4f508ac
Show file tree
Hide file tree
Showing 12 changed files with 894 additions and 437 deletions.
437 changes: 0 additions & 437 deletions STGCN_IJCAI.py

This file was deleted.

6 changes: 6 additions & 0 deletions data_loader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# @Time : Jan. 10, 2019 15:24
# @Author : Veritas YIN
# @FileName : __init__.py
# @Version : 1.0
# @IDE : PyCharm
# @Github : https://github.com/VeritasYin/Project_Orion
118 changes: 118 additions & 0 deletions data_loader/data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# @Time : Jan. 10, 2019 15:26
# @Author : Veritas YIN
# @FileName : data_utils.py
# @Version : 1.0
# @IDE : PyCharm
# @Github : https://github.com/VeritasYin/Project_Orion

from utils.math_utils import z_score

import numpy as np
import pandas as pd


class Dataset(object):
def __init__(self, data, stats):
self.__data = data
self.mean = stats['mean']
self.std = stats['std']

def get_data(self, type):
return self.__data[type]

def get_stats(self):
return {'mean': self.mean, 'std': self.std}

def get_len(self, type):
return len(self.__data[type])

def z_inverse(self, type):
return self.__data[type] * self.std + self.mean


def seq_gen(len_seq, data_seq, offset, n_frame, n_route, day_slot, C_0=1):
'''
Generate data in the form of standard sequence unit.
:param len_seq: int, the length of target date sequence.
:param data_seq: np.ndarray, source data / time-series.
:param offset: int, the starting index of different dataset type.
:param n_frame: int, the number of frame within a standard sequence unit,
which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min).
:param n_route: int, the number of routes in the graph.
:param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default).
:param C_0: int, the size of input channel.
:return: np.ndarray, [len_seq, n_frame, n_route, C_0].
'''
n_slot = day_slot - n_frame + 1

tmp_seq = np.zeros((len_seq * n_slot, n_frame, n_route, C_0))
for i in range(len_seq):
for j in range(n_slot):
sta = (i + offset) * day_slot + j
end = sta + n_frame
tmp_seq[i * n_slot + j, :, :, :] = np.reshape(data_seq[sta:end, :], [n_frame, n_route, C_0])
return tmp_seq


def data_gen(file_path, data_config, n_route, n_frame=21, day_slot=288):
'''
Source file load and dataset generation.
:param file_path: str, the file path of data source.
:param data_config: tuple, the configs of dataset in train, validation, test.
:param n_route: int, the number of routes in the graph.
:param n_frame: int, the number of frame within a standard sequence unit,
which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min).
:param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default).
:return: dict, dataset that contains training, validation and test with stats.
'''
n_train, n_val, n_test = data_config
# generate training, validation and test data
try:
data_seq = pd.read_csv(file_path, header=None).values
except FileNotFoundError:
print(f'ERROR: input file was not found in {file_path}.')

seq_train = seq_gen(n_train, data_seq, 0, n_frame, n_route, day_slot)
seq_val = seq_gen(n_val, data_seq, n_train, n_frame, n_route, day_slot)
seq_test = seq_gen(n_test, data_seq, n_train + n_val, n_frame, n_route, day_slot)

# x_stats: dict, the stats for the train dataset, including the value of mean and standard deviation.
x_stats = {'mean': np.mean(seq_train), 'std': np.std(seq_train)}

# x_train, x_val, x_test: np.array, [sample_size, n_frame, n_route, channel_size].
x_train = z_score(seq_train, x_stats['mean'], x_stats['std'])
x_val = z_score(seq_val, x_stats['mean'], x_stats['std'])
x_test = z_score(seq_test, x_stats['mean'], x_stats['std'])

x_data = {'train': x_train, 'val': x_val, 'test': x_test}
dataset = Dataset(x_data, x_stats)
return dataset


def gen_batch(inputs, batch_size, dynamic_batch=False, shuffle=False):
'''
Data iterator in batch.
:param inputs: np.ndarray, [len_seq, n_frame, n_route, C_0], standard sequence units.
:param batch_size: int, the size of batch.
:param dynamic_batch: bool, whether changes the batch size in the last batch if its length is less than the default.
:param shuffle: bool, whether shuffle the batches.
'''
len_inputs = len(inputs)

if shuffle:
idx = np.arange(len_inputs)
np.random.shuffle(idx)

for start_idx in range(0, len_inputs, batch_size):
end_idx = start_idx + batch_size
if end_idx > len_inputs:
if dynamic_batch:
end_idx = len_inputs
else:
break
if shuffle:
slide = idx[start_idx:end_idx]
else:
slide = slice(start_idx, end_idx)

yield inputs[slide]
70 changes: 70 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# @Time : Jan. 02, 2019 22:17
# @Author : Veritas YIN
# @FileName : main.py
# @Version : 1.0
# @Project : Orion
# @IDE : PyCharm
# @Github : https://github.com/VeritasYin/Project_Orion

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from os.path import join as pjoin

import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.Session(config=config)

from utils.math_graph import *
from data_loader.data_utils import *
from models.trainer import model_train
from models.tester import model_test

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--n_route', type=int, default=228)
parser.add_argument('--n_his', type=int, default=12)
parser.add_argument('--n_pred', type=int, default=9)
parser.add_argument('--batch_size', type=int, default=50)
parser.add_argument('--epoch', type=int, default=50)
parser.add_argument('--save', type=int, default=10)
parser.add_argument('--ks', type=int, default=3)
parser.add_argument('--kt', type=int, default=3)
parser.add_argument('--lr', type=float, default=1e-3)
parser.add_argument('--opt', type=str, default='RMSProp')
parser.add_argument('--graph', type=str, default='default')
parser.add_argument('--inf_mode', type=str, default='merge')

args = parser.parse_args()
print(f'Training configs: {args}')

n, n_his, n_pred = args.n_route, args.n_his, args.n_pred
Ks, Kt = args.ks, args.kt
# blocks: settings of channel size in st_conv_blocks / bottleneck design
blocks = [[1, 32, 64], [64, 32, 128]]

# Load wighted adjacency matrix W
if args.graph == 'default':
W = weight_matrix(pjoin('./dataset', f'PeMSD7_W_{n}.csv'))
else:
# load customized graph weight matrix
W = weight_matrix(pjoin('./dataset', args.graph))

# Calculate graph kernel
L = scaled_laplacian(W)
# Alternative approximation method: 1st approx - first_approx(W, n).
Lk = cheb_poly_approx(L, Ks, n)
tf.add_to_collection(name='graph_kernel', value=tf.cast(tf.constant(Lk), tf.float32))

# Data Preprocessing
data_file = f'PeMSD7_V_{n}.csv'
n_train, n_val, n_test = 34, 5, 5
PeMS = data_gen(pjoin('./dataset', data_file), (n_train, n_val, n_test), n, n_his + n_pred)
print(f'>> Loading dataset with Mean: {PeMS.mean:.2f}, STD: {PeMS.std:.2f}')

if __name__ == '__main__':
model_train(PeMS, blocks, args)
model_test(PeMS, PeMS.get_len('test'), n_his, n_pred, args.inf_mode)
6 changes: 6 additions & 0 deletions models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# @Time : Jan. 10, 2019 17:49
# @Author : Veritas YIN
# @FileName : __init__.py
# @Version : 1.0
# @IDE : PyCharm
# @Github : https://github.com/VeritasYin/Project_Orion
57 changes: 57 additions & 0 deletions models/base_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# @Time : Jan. 12, 2019 19:01
# @Author : Veritas YIN
# @FileName : base_model.py
# @Version : 1.0
# @IDE : PyCharm
# @Github : https://github.com/VeritasYin/Project_Orion

from models.layers import *
from os.path import join as pjoin
import tensorflow as tf


def build_model(inputs, n_his, Ks, Kt, blocks, keep_prob):
'''
Build the base model.
:param inputs: placeholder.
:param n_his: int, size of historical records for training.
:param Ks: int, kernel size of spatial convolution.
:param Kt: int, kernel size of temporal convolution.
:param blocks: list, channel configs of st_conv blocks.
:param keep_prob: placeholder.
'''
x = inputs[:, 0:n_his, :, :]

# Ko>0: kernel size of temporal convolution in the output layer.
Ko = n_his
# ST-Block
for i, channels in enumerate(blocks):
x = st_conv_block(x, Ks, Kt, channels, i, keep_prob, act_func='GLU')
Ko -= 2 * (Ks - 1)

# Output Layer
if Ko > 1:
y = output_layer(x, Ko, 'output_layer')
else:
raise ValueError(f'ERROR: kernel size Ko must be greater than 1, but received "{Ko}".')

tf.add_to_collection(name='copy_loss',
value=tf.nn.l2_loss(inputs[:, n_his - 1:n_his, :, :] - inputs[:, n_his:n_his + 1, :, :]))
train_loss = tf.nn.l2_loss(y - inputs[:, n_his:n_his + 1, :, :])
single_pred = y[:, 0, :, :]
tf.add_to_collection(name='y_pred', value=single_pred)
return train_loss, single_pred


def model_save(sess, global_steps, model_name, save_path='./output/models/'):
'''
Save the checkpoint of trained model.
:param sess: tf.Session().
:param global_steps: tensor, record the global step of training in epochs.
:param model_name: str, the name of saved model.
:param save_path: str, the path of saved model.
:return:
'''
saver = tf.train.Saver(max_to_keep=3)
prefix_path = saver.save(sess, pjoin(save_path, model_name), global_step=global_steps)
print(f'<< Saving model to {prefix_path} ...')
Loading

0 comments on commit 4f508ac

Please sign in to comment.