diff --git a/README.md b/README.md
index 74a762f..13fabf5 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,86 @@
-# Lexicon-NN
+# LGN
 
-Code for an EMNLP long paper (Under review).
+Pytorch implementation of A Lexicon-Based Graph Neural Network for Chinese NER.
+
+The code is partially referred to https://github.com/jiesutd/LatticeLSTM.
+
+## Requirements
+
+* Python 3.6 or higher
+* Pytorch 0.4.1 or higher
+
+## Input Format
+
+BMES tag scheme, with each character its label for one line. Sentences are splited with a null line.
+
+	印   B-LOC
+	度   M-LOC
+	河   E-LOC
+	流   O
+	经   O
+	印   B-GPE
+	度   E-GPE
+
+## Usage
+
+* Training
+
+		python main.py --status train \
+		               --train data/onto4ner.cn/train.char.bmes \
+		               --dev data/onto4ner.cn/dev.char.bmes \
+		               --test data/onto4ner.cn/test.char.bmes \
+		               --saved_model saved_model/model_onto4ner \
+		               --saved_set data/onto4ner.cn/saved.dset
+		               
+* Testing
+
+		python main.py --status test \
+		               --test data/onto4ner.cn/test.char.bmes \
+		               --saved_model saved_model/model_onto4ner \
+		               --saved_set data/onto4ner.cn/saved.dset
+		               
+* Decoding (Raw file can either be labeled or not.)
+
+		python main.py --status decode \
+		               --raw data/onto4ner.cn/test.char.bmes \
+		               --output tagged_file.txt \
+		               --saved_model saved_model/model_onto4ner \
+		               --saved_set data/onto4ner.cn/saved.dset
+		               
+## Data Downloads
+
+The pretrained character and word embeddings can be downloaded from [Lattice LSTM](https://github.com/jiesutd/LatticeLSTM).
+
+Datasets including OntoNotes, MSRA, Weibo and Resume are available at Google Drive or Baidu Pan.
+
+## Pretrained Model Downloads
+
+We also provide pretrained models on the four datasets, which are the same models as reported in the paper.
+If you try to retrain models from scratch under the same hyper-parameter settings, you may obtain a sightly 
+lower or higher F1 score than that reported in the paper (in our experiments we selected the model that performed best).
+
+Pretrained models and related hyper-parameter settings are available at Google Drive or Baidu Pan.
+
+When running main.py in test mode for pretrained models, you can get the results as follows:
+
+| Datasets       | Precision | Recall  | F1    | 
+|:--------------:|:---------:|:-------:|:-----:|
+| OntoNotes dev  |   74.00   |  70.03  | 71.96 |
+| OntoNotes test |   76.13   |  73.68  | 74.89 | 
+| MSRA dev       |     -     |   -     |   -   |
+| MSRA test      |   94.19   |  92.73  | 93.46 |
+| Weibo dev      |   66.09   |  59.13  | 62.42 |
+| Weibo test     |   65.71   |  55.56  | 60.21 |
+| Resume dev     |   94.27   |  94.59  | 94.43 |
+| Resume test    |   95.28   |  95.46  | 95.37 |
+
+## Cite
+
+	@article{gui2019lexicon,  
+	 title={A Lexicon-Based Graph Neural Network for Chinese NER},  
+	 author={Gui, Tao and Zou, Yicheng and Zhang, Qi and Peng, Minlong and 
+	 Fu, Jinlan and Wei, Zhongyu and Huang, Xuanjing},  
+	 booktitle={2019 Conference on Empirical Methods in Natural Language Processing and 
+	 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
+	 year={2019}  
+	}
\ No newline at end of file
diff --git a/main.py b/main.py
index e660f3f..0a3b846 100644
--- a/main.py
+++ b/main.py
@@ -87,6 +87,29 @@ def recover_label(pred_variable, gold_variable, mask_variable, label_alphabet):
     return pred_label, gold_label
 
 
+def print_args(args):
+    print("CONFIG SUMMARY:")
+    print("     Batch size: %s" % (args.batch_size))
+    print("     If use GPU: %s" % (args.use_gpu))
+    print("     If use CRF: %s" % (args.use_crf))
+    print("     Epoch  number: %s" % (args.num_epoch))
+    print("     Learning rate: %s" % (args.lr))
+    print("     L2 normalization rate: %s" % (args.weight_decay))
+    print("     If use edge embedding: %s" % (args.use_edge))
+    print("     If  use  global  node: %s" % (args.use_global))
+    print("     Bidirectional digraph: %s" % (args.bidirectional))
+    print("     Update   step  number: %s" % (args.iters))
+    print("     Attention  dropout   rate: %s" % (args.tf_drop_rate))
+    print("     Embedding  dropout   rate: %s" % (args.emb_drop_rate))
+    print("     Hidden  state   dimension: %s" % (args.hidden_dim))
+    print("     Learning rate decay ratio: %s" % (args.lr_decay))
+    print("     Aggregation module dropout rate: %s" % (args.cell_drop_rate))
+    print("     Head    number   of   attention: %s" % (args.num_head))
+    print("     Head  dimension   of  attention: %s" % (args.head_dim))
+    print("CONFIG SUMMARY END.")
+    sys.stdout.flush()
+
+
 def evaluate(data, args, model, name):
     if name == "train":
         instances = data.train_Ids
@@ -342,7 +365,7 @@ def load_model_decode(model_dir, data, args, name):
     parser.add_argument('--raw', help='Raw file for decoding.')
     parser.add_argument('--output', help='Output results for decoding.')
     parser.add_argument('--saved_set', help='Path of saved data set.', default='data/onto4ner.cn/saved.dset')
-    parser.add_argument('--saved_model', help='Path of saved model.', default="saved_model/model_ontonote")
+    parser.add_argument('--saved_model', help='Path of saved model.', default="saved_model/model_onto4ner")
     parser.add_argument('--char_emb', help='Path of character embedding file.', default="data/gigaword_chn.all.a2b.uni.ite50.vec")
     parser.add_argument('--word_emb', help='Path of word embedding file.', default="data/ctb.50d.vec")
 
@@ -352,7 +375,7 @@ def load_model_decode(model_dir, data, args, name):
     parser.add_argument('--bidirectional', type=str2bool, default=True, help='If use bidirectional digraph.')
 
     parser.add_argument('--seed', help='Random seed', default=1023, type=int)
-    parser.add_argument('--batch_size', help='Batch size. ', default=1, type=int)
+    parser.add_argument('--batch_size', help='Batch size.', default=1, type=int)
     parser.add_argument('--num_epoch',default=100, type=int, help="Epoch number.")
     parser.add_argument('--iters', default=4, type=int, help='The number of Graph iterations.')
     parser.add_argument('--hidden_dim', default=50, type=int, help='Hidden state size.')
@@ -378,7 +401,6 @@ def load_model_decode(model_dir, data, args, name):
     torch.manual_seed(seed_num)
     np.random.seed(seed_num)
 
-
     train_file = args.train
     dev_file = args.dev
     test_file = args.test
@@ -412,6 +434,7 @@ def load_model_decode(model_dir, data, args, name):
         args.label_alphabet_size = data.label_alphabet.size()
         args.char_dim = data.char_emb_dim
         args.word_dim = data.word_emb_dim
+        print_args(args)
         train(data, args, saved_model_path)
 
     elif status == 'test':
@@ -426,6 +449,8 @@ def load_model_decode(model_dir, data, args, name):
         data.generate_instance_with_words(test_file, 'test')
         with open(saved_model_path + "_best_HP.config", "rb") as f:
             args = pickle.load(f)
+        data.show_data_summary()
+        print_args(args)
         load_model_decode(saved_model_path, data, args, "test")
 
     elif status == 'decode':
@@ -440,6 +465,8 @@ def load_model_decode(model_dir, data, args, name):
         data.generate_instance_with_words(raw_file, 'raw')
         with open(saved_model_path + "_best_HP.config", "rb") as f:
             args = pickle.load(f)
+        data.show_data_summary()
+        print_args(args)
         decode_results = load_model_decode(saved_model_path, data, args, "raw")
         data.write_decoded_results(output_file, decode_results, 'raw')
     else:
diff --git a/utils/functions.py b/utils/functions.py
index 4f951f5..2038b74 100644
--- a/utils/functions.py
+++ b/utils/functions.py
@@ -3,7 +3,6 @@
 # @Last Modified by:   Yicheng Zou,     Contact: yczou18@fudan.edu.cn
 
 import numpy as np
-import re
 
 
 def normalize_word(word):