add README introduction

mindspore-lab · Dec 6, 2022 · 7eb67cc · 7eb67cc
1 parent 376fd35
commit 7eb67cc
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -27,13 +27,15 @@
 
 ## Introduction
 
-MindNLP is an open source NLP library based on MindSpore.
+MindNLP is an open source NLP library based on MindSpore. It supports a platform for solving natural language processing tasks, containing many common approaches in NLP. It can help researchers and developers to construct and train models more conveniently and rapidly.
 
 The master branch works with **MindSpore master**.
 
 ### Major Features
 
-- feature1: ...
+- **Comprehensive data processing**: Several classical NLP datasets are packaged into friendly module for easy use, such as Multi30k, SQuAD, CoNLL, etc.
+- **Friendly NLP model toolset**: MindNLP provides various configurable components. It is friendly to customize models using MindNLP.
+- **Easy-to-use engine**: MindNLP simplified complicated training process in MindSpore. It supports Trainer and Evaluator interfaces to train and evaluate models easily.
 
 ## Quick Links
 

diff --git a/examples/sentiment_classification.py b/examples/sentiment_classification.py
@@ -72,12 +72,8 @@ def construct(self, text):
 lr = 0.001
 
 # load datasets
-imdb_train, imdb_test = load('imdb')
-embedding, vocab = Glove.from_pretrained('6B', 100, special_tokens=["<unk>", "<pad>"], dropout=drop)
-
-lookup_op = ds.text.Lookup(vocab, unknown_token='<unk>')
-pad_op = ds.transforms.PadEnd([500], pad_value=vocab.tokens_to_ids('<pad>'))
-type_cast_op = ds.transforms.TypeCast(ms.float32)
+imdb_train, imdb_test = load('imdb', shuffle=True)
+print(imdb_train.get_col_names())
 
 embedding, vocab = Glove.from_pretrained('6B', 100, special_tokens=["<unk>", "<pad>"], dropout=drop)
 tokenizer = BasicTokenizer(True)
@@ -90,9 +86,9 @@ def construct(self, text):
                      dropout=drop, bidirectional=bidirectional)
 sentiment_encoder = RNNEncoder(embedding, lstm_layer)
 sentiment_head = Head(hidden_size, output_size, drop)
-net = SentimentClassification(sentiment_encoder, sentiment_head)
 
-loss = nn.BCEWithLogitsLoss(reduction='mean')
+net = SentimentClassification(sentiment_encoder, sentiment_head)
+loss = nn.BCELoss(reduction='mean')
 optimizer = nn.Adam(net.trainable_params(), learning_rate=lr)
 
 # define metrics
@@ -102,4 +98,4 @@ def construct(self, text):
 trainer = Trainer(network=net, train_dataset=imdb_train, eval_dataset=imdb_valid, metrics=metric,
                   epochs=5, loss_fn=loss, optimizer=optimizer)
 trainer.run(tgt_columns="label", jit=False)
-print("end train")
+print("end train")
diff --git a/mindnlp/modules/embeddings/glove_embedding.py b/mindnlp/modules/embeddings/glove_embedding.py
@@ -74,7 +74,7 @@ def __init__(self, vocab: Vocab, init_embed, requires_grad: bool = True, dropout
 
     @classmethod
     def from_pretrained(cls, name='6B', dims=300, root=DEFAULT_ROOT,
-                        special_tokens=("<pad>", "<unk>"), special_first=False, **kwargs):
+                        special_tokens=("<pad>", "<unk>"), special_first=True, **kwargs):
         r"""
         Creates Embedding instance from given 2-dimensional FloatTensor.