fix max_step, update run.sh and run_ernie.sh

PaddlePaddle · kuke · Apr 22, 2019 · Apr 5, 2019 · Apr 8, 2019 · Apr 8, 2019
commit 29e112ab6d9788512c63d53ffa7375e475154093
diff --git a/PaddleNLP/paddle-nlp/emotion_detection/README.md b/PaddleNLP/paddle-nlp/emotion_detection/README.md
@@ -2,9 +2,9 @@
 
 对话情绪识别（Emotion Detection，简称EmoTect），专注于识别智能对话场景中用户的情绪，针对智能对话场景中的用户文本，自动判断该文本的情绪类别并给出相应的置信度，情绪类型分为积极、消极、中性。
 
-对话情绪识别适用于聊天、客服等多个场景，能够帮助企业更好地把握对话质量、改善产品的用户交互体验，也能分析客服服务质量、降低人工质检成本。可通过[AI开放平台-对话情绪识别](http://ai.baidu.com/tech/nlp_apply/emotion_detection) 线上体验。
+对话情绪识别适用于聊天、客服等多个场景，能够帮助企业更好地把握对话质量、改善产品的用户交互体验，也能分析客服服务质量、降低人工质检成本。可通过 [AI开放平台-对话情绪识别](http://ai.baidu.com/tech/nlp_apply/emotion_detection) 线上体验。
 
-效果上，我们基于百度自建测试集（包含闲聊、客服）和nlpcc2014微博情绪数据集，进行评测，效果如下表所示：
+效果上，我们基于百度自建测试集（包含闲聊、客服）和nlpcc2014微博情绪数据集，进行评测，效果如下表所示，此外我们还开源了百度基于海量数据训练好的模型，该模型在聊天对话语料上fine-tune之后，可以得到更好的效果。
 
 | 模型 | 闲聊 | 客服 | 微博 |
 | :------| :------ | :------ | :------ |
@@ -47,11 +47,11 @@ sh run.sh eval
 ```shell
 sh run.sh train
 ```
-训练完成后，可修改```run.sh```中init_checkpoint参数，进行模型评估和预测
+训练完成后，可修改```run.sh```中init_checkpoint参数，选择最优step的模型进行评估和预测
 
 #### 模型预测
 
-基于预训练模型，可在新的数据集（infer.tsv）上进行预测，得到模型预测结果及概率
+在新的数据集（infer.tsv）上进行预测，得到模型预测结果及各label的概率
 ```shell
 sh run.sh infer
 ```
@@ -78,6 +78,7 @@ sh run.sh infer
 训练、预测、评估使用的数据示例如下，数据由两列组成，以制表符（'\t'）分隔，第一列是情绪分类的类别（0表示消极；1表示中性；2表示积极），第二列是以空格分词的中文文本，文件为utf8编码。
 
 ```text
+label   text_a
 0   谁 骂人 了 ？ 我 从来 不 骂人 ， 我 骂 的 都 不是 人 ， 你 是 人 吗 ？
 1   我 有事 等会儿 就 回来 和 你 聊
 2   我 见到 你 很高兴 谢谢 你 帮 我
@@ -131,6 +132,7 @@ TASK_DATA_PATH=./data
 ```
 sh run_ernie.sh train
 ```
+训练完成后，可修改```run_ernie.sh```中init_checkpoint参数，选择最优step的模型进行评估和预测
 训练、评估、预测详细配置，请查看 ```run_ernie.sh```
 
 ## 如何贡献代码

diff --git a/PaddleNLP/paddle-nlp/emotion_detection/run.sh b/PaddleNLP/paddle-nlp/emotion_detection/run.sh
@@ -47,7 +47,7 @@ infer() {
         --batch_size 32 \
         --data_dir ${DATA_PATH} \
         --vocab_path ${VOCAB_PATH} \
-        --init_checkpoint ./save_models/textcnn/step_785/ \
+        --init_checkpoint ${CKPT_PATH}/step_785/ \
         --config_path ./config.json
 }
 

diff --git a/PaddleNLP/paddle-nlp/emotion_detection/run_classifier.py b/PaddleNLP/paddle-nlp/emotion_detection/run_classifier.py
@@ -182,7 +182,7 @@ def main(args):
             epoch=args.epoch)
 
         num_train_examples = processor.get_num_examples(phase="train")
-        max_train_steps = args.epoch * num_train_examples // args.batch_size
+        max_train_steps = args.epoch * num_train_examples // args.batch_size + 1
 
         print("Num train examples: %d" % num_train_examples)
         print("Max train steps: %d" % max_train_steps)

diff --git a/PaddleNLP/paddle-nlp/emotion_detection/run_ernie.sh b/PaddleNLP/paddle-nlp/emotion_detection/run_ernie.sh
@@ -51,7 +51,7 @@ infer() {
         --verbose true \
         --do_infer true \
         --batch_size 32 \
-        --init_checkpoint ${MODEL_PATH}/params \
+        --init_checkpoint ${CKPT_PATH}/step_943 \
         --infer_set ${TASK_DATA_PATH}/infer.tsv \
         --vocab_path ${MODEL_PATH}/vocab.txt \
         --max_seq_len 64 \

diff --git a/PaddleNLP/paddle-nlp/emotion_detection/run_ernie_classifier.py b/PaddleNLP/paddle-nlp/emotion_detection/run_ernie_classifier.py
@@ -185,7 +185,7 @@ def main(args):
 
         num_train_examples = reader.get_num_examples(args.train_set)
 
-        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
+        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count + 1
 
         print("Device count: %d" % dev_count)
         print("Num train examples: %d" % num_train_examples)