Skip to content

Commit

Permalink
Add CE to Sentiment (PaddlePaddle#2777)
Browse files Browse the repository at this point in the history
* Update mnist_dygraph.py

fix bug

* add ce to se_resnext

* delete useless comments and fix unique_name bugs

* add sentiment ce

* delete some comment

* fix code style
  • Loading branch information
DDDivano authored Jul 15, 2019
1 parent 701599c commit 011591d
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 27 deletions.
8 changes: 8 additions & 0 deletions dygraph/sentiment/.run_ce.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# This file is only used for continuous evaluation.
# dygraph single card
export FLAGS_cudnn_deterministic=True
export CUDA_VISIBLE_DEVICES=0
python main.py --ce --epoch 1 --random_seed 33 --validation_steps 600 | python _ce.py

58 changes: 58 additions & 0 deletions dygraph/sentiment/_ce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
####this file is only used for continuous evaluation test!
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
from kpi import CostKpi, DurationKpi, AccKpi
sys.path.append(os.environ['ceroot'])

#### NOTE kpi.py should shared in models in some way!!!!

train_acc = AccKpi('train_acc', 0.1, 0, actived=True, desc="train acc")
train_loss = CostKpi('train_loss', 0.1, 0, actived=True, desc="train loss")
tracking_kpis = [train_acc, train_loss]

def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value


def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi

for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()


if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
61 changes: 44 additions & 17 deletions dygraph/sentiment/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@
data_g.add_arg("random_seed", int, 0, "Random seed.")

run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.")
run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.")
run_type_g.add_arg("do_train", bool, True, "Whether to perform training.")
run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation.")
run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.")
run_type_g.add_arg("profile_steps", int, 15000,
"The steps interval to record the performance.")
parser.add_argument("--ce", action="store_true", help="run ce")

args = parser.parse_args()

Expand All @@ -81,8 +82,21 @@ def profile_context(profile=True):
yield


if args.ce:
print("ce mode")
seed = 90
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed

def train():
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 90
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
processor = reader.SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
Expand All @@ -92,19 +106,31 @@ def train():
num_train_examples = processor.get_num_examples(phase="train")

max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

train_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='train',
epoch=args.epoch,
shuffle=True)

eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='dev',
epoch=args.epoch,
shuffle=False)


if not args.ce:
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='train',
epoch=args.epoch,
shuffle=True)

eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='dev',
epoch=args.epoch,
shuffle=False)
else:
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='train',
epoch=args.epoch,
shuffle=False)

eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='dev',
epoch=args.epoch,
shuffle=False)
cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
args.padding_size)

Expand Down Expand Up @@ -137,7 +163,6 @@ def train():
cnn_net.train()
avg_cost, prediction, acc = cnn_net(doc, label)
avg_cost.backward()

np_mask = (doc.numpy() != args.vocab_size).astype('int32')
word_num = np.sum(np_mask)
sgd_optimizer.minimize(avg_cost)
Expand Down Expand Up @@ -200,14 +225,16 @@ def train():
/ np.sum(total_eval_num_seqs),
eval_steps / used_time))
time_begin = time.time()
if args.ce:
print("kpis\ttrain_loss\t%0.3f" % (np.sum(total_eval_cost) / np.sum(total_eval_num_seqs)))
print("kpis\ttrain_acc\t%0.3f" % (np.sum(total_eval_acc) / np.sum(total_eval_num_seqs)))

if steps % args.save_steps == 0:
save_path = "save_dir_" + str(steps)
print('save model to: ' + save_path)
fluid.dygraph.save_persistables(cnn_net.state_dict(),
save_path)

if enable_profile:
if enable_profile:
print('save profile result into /tmp/profile_file')
return

Expand Down
1 change: 0 additions & 1 deletion dygraph/sentiment/nets.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def forward(self, inputs, label=None):
emb = fluid.layers.reshape(
emb, shape=[-1, 1, self.seq_len, self.hid_dim])
conv_3 = self._simple_conv_pool_1(emb)

fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)

Expand Down
12 changes: 6 additions & 6 deletions dygraph/sentiment/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ def __init__(self, data_dir, vocab_path, random_seed=None):
self.num_examples = {"train": -1, "dev": -1, "infer": -1}
np.random.seed(random_seed)

def get_train_examples(self, data_dir, epoch):
def get_train_examples(self, data_dir, epoch, shuffle):
return data_reader((self.data_dir + "/train.tsv"), self.vocab,
self.num_examples, "train", epoch)
self.num_examples, "train", epoch, shuffle)

def get_dev_examples(self, data_dir, epoch):
def get_dev_examples(self, data_dir, epoch, shuffle):
return data_reader((self.data_dir + "/dev.tsv"), self.vocab,
self.num_examples, "dev", epoch)
self.num_examples, "dev", epoch, shuffle)

def get_test_examples(self, data_dir, epoch):
return data_reader((self.data_dir + "/test.tsv"), self.vocab,
Expand All @@ -52,12 +52,12 @@ def get_train_progress(self):
def data_generator(self, batch_size, phase='train', epoch=1, shuffle=True):
if phase == "train":
return paddle.batch(
self.get_train_examples(self.data_dir, epoch),
self.get_train_examples(self.data_dir, epoch, shuffle),
batch_size,
drop_last=True)
elif phase == "dev":
return paddle.batch(
self.get_dev_examples(self.data_dir, epoch),
self.get_dev_examples(self.data_dir, epoch, shuffle),
batch_size,
drop_last=True)
elif phase == "infer":
Expand Down
7 changes: 4 additions & 3 deletions dygraph/sentiment/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def add_arg(self, name, type, default, help, **kwargs):
**kwargs)


def data_reader(file_path, word_dict, num_examples, phrase, epoch):
def data_reader(file_path, word_dict, num_examples, phrase, epoch, shuffle=False):
unk_id = len(word_dict)
all_data = []
with io.open(file_path, "r", encoding='utf8') as fin:
Expand All @@ -56,8 +56,9 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch):
]
all_data.append((wids, label))

if phrase == "train":
random.shuffle(all_data)
if shuffle:
if phrase == "train":
random.shuffle(all_data)

num_examples[phrase] = len(all_data)

Expand Down

0 comments on commit 011591d

Please sign in to comment.