Merge branch 'mindspore-lab:master' into master

mindspore-lab · Mar 21, 2023 · fe97d3f · fe97d3f
2 parents eb04d89 + 7e8d8d6
commit fe97d3f
Show file tree

Hide file tree

Showing 62 changed files with 4,084 additions and 463 deletions.
diff --git a/.github/install_mindspore.py b/.github/install_mindspore.py
@@ -0,0 +1,28 @@
+import re
+import requests
+import os
+
+def gen_url(os, py_version):
+    hf_url = 'https://huggingface.co/lvyufeng/mindspore-daily/resolve/main/'
+    whl_name = 'mindspore-newest-cp{}-cp{}-{}.whl'
+    py_version = py_version.replace('.', '')
+
+    if os == 'ubuntu-latest':
+        platform = 'linux_x86_64'
+    elif os == 'macos-latest':
+        platform = 'macosx_10_15_x86_64'
+    elif os == 'windows-latest':
+        platform = 'win_amd64'
+    else:
+        raise ValueError(f'not support this operate system {os}')
+
+    py_version2 = py_version if py_version != '37' else py_version + 'm'
+    whl_name = whl_name.format(py_version, py_version2, platform)
+
+    with open('download.txt', 'w', encoding='utf-8') as f:
+        f.write(hf_url + whl_name)
+
+if __name__ == '__main__':
+    platform = os.environ['OS']
+    python = os.environ['PYTHON']
+    gen_url(platform, python)
diff --git a/.github/workflows/ci_pipeline.yaml b/.github/workflows/ci_pipeline.yaml
@@ -36,6 +36,14 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements/dev_requirements.txt
+    - name: Install MindSpore
+      shell: bash
+      env:
+        OS: ubuntu-latest
+        PYTHON: 3.7
+      run: |
+        python .github/install_mindspore.py
+        pip install -r download.txt
     - name: Analysing the mindnlp code with pylint
       run: |
         pylint mindnlp --rcfile=.github/pylint.conf
@@ -60,6 +68,14 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements/dev_requirements.txt
+    - name: Install MindSpore
+      shell: bash
+      env:
+        OS: ${{ matrix.os }}
+        PYTHON: ${{ matrix.python }}
+      run: |
+        python .github/install_mindspore.py
+        pip install -r download.txt
     - name: Test with pytest
       run: |
         pytest -m 'not dataset' tests/ut
@@ -81,6 +97,14 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements/dev_requirements.txt
+    - name: Install MindSpore
+      shell: bash
+      env:
+        OS: ${{ matrix.os }}
+        PYTHON: ${{ matrix.python }}
+      run: |
+        python .github/install_mindspore.py
+        pip install -r download.txt
     - name: Test ST with pytest
       run: |
         pytest tests/st
diff --git a/.gitignore b/.gitignore
@@ -133,4 +133,5 @@ kernel_meta/
 rank_0/
 
 .vscode/
-*.ckpt
+*.ckpt
+download.txt
diff --git a/docs/api/metrics.rst b/docs/api/metrics.rst
@@ -0,0 +1,106 @@
+Metrics
+========
+
+accuracy
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.accuracy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+bleu
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.bleu
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+confusion_matrix
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.confusion_matrix
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+distinct
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.distinct
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+em\_score
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.em_score
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+f1
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.f1
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+matthews
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.matthews
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+pearson
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.pearson
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+perplexity
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.perplexity
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+precision
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.precision
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+recall
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.recall
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+rouge
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.rouge
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+spearman
+---------------------------------------
+
+.. automodule:: mindnlp.metrics.spearman
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/scoring.rst b/docs/api/scoring.rst
diff --git a/docs/index.rst b/docs/index.rst
@@ -49,8 +49,8 @@ What is MindNLP
    api/abc
    api/dataset
    api/engine
+   api/metrics
    api/modules
    api/models
-   api/scoring
    api/transforms
    api/utils
diff --git a/docs/quick_start/use_callback.rst b/docs/quick_start/use_callback.rst
@@ -18,23 +18,25 @@ The code of using engine is as follows:
 
 .. code:: python
 
+    import numpy as np
     import mindspore.dataset as ds
 
     from mindspore import nn
 
     from mindnlp.engine.trainer import Trainer
+    from mindnlp.engine.callbacks.timer_callback import TimerCallback
     from mindnlp.engine.callbacks.earlystop_callback import EarlyStopCallback
 
     class MyDataset:
-    """Dataset"""
-    def __init__(self):
-        self.data = np.random.randn(20, 3).astype(np.float32)
-        self.label = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
-        self.length = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
-    def __getitem__(self, index):
-        return self.data[index], self.label[index], self.length[index]
-    def __len__(self):
-        return len(self.data)
+        """Dataset"""
+        def __init__(self):
+            self.data = np.random.randn(20, 3).astype(np.float32)
+            self.label = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
+            self.length = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
+        def __getitem__(self, index):
+            return self.data[index], self.label[index], self.length[index]
+        def __len__(self):
+            return len(self.data)
 
     class MyModel(nn.Cell):
         """Model"""
@@ -48,9 +50,7 @@ The code of using engine is as follows:
     # Define Dataset
     dataset_generator = MyDataset()
     train_dataset = ds.GeneratorDataset(dataset_generator, ["data", "label", "length"], shuffle=False)
-    eval_dataset = ds.GeneratorDataset(dataset_generator, ["data", "label", "length"], shuffle=False)
     train_dataset = train_dataset.batch(4)
-    eval_dataset = eval_dataset.batch(4)
     # Define Model
     net = MyModel()
     net.update_parameters_name('net.')
@@ -61,10 +61,10 @@ The code of using engine is as follows:
     # Define Callback
     timer_callback = TimerCallback(print_steps=2)
     # Define Trainer
-    trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=eval_dataset,
+    trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=None,
                       epochs=6, optimizer=optimizer, loss_fn=loss_fn, callbacks=timer_callback)
     # Run Trainer
-    trainer.run(tgt_columns='label', jit=True)
+    trainer.run(tgt_columns='label')
 
 Callbacks in MindNLP
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -128,6 +128,6 @@ and the average loss value after each epoch.
             logging.info('Avg loss at epoch %d, %.6f', self.epoch, avg_loss)
 
     my_callback = MyCallBack()
-    trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=eval_dataset,
+    trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=None,
                       epochs=6, optimizer=optimizer, loss_fn=loss_fn, callbacks=my_callback)
-    trainer.run(tgt_columns='label', jit=True)
+    trainer.run(tgt_columns='label')
diff --git a/examples/language_model/fasttext.py b/examples/language_model/fasttext.py
@@ -22,7 +22,7 @@
 from mindspore.common.initializer import XavierUniform
 from mindspore.dataset.text.utils import Vocab
 from mindnlp.engine.trainer import Trainer
-from mindnlp.engine.metrics import Accuracy
+from mindnlp.metrics import Accuracy
 from mindnlp.modules.embeddings import Glove
 from mindnlp import load_dataset, process
 
@@ -65,7 +65,7 @@ def construct(self, text):
         return classifier
 
 
-ag_news_train, ag_news_test = load('ag_news', shuffle=True)
+ag_news_train, ag_news_test = load_dataset('ag_news', shuffle=True)
 
 vocab_size = 1383812
 embedding_dims = 16

diff --git a/mindnlp/__init__.py b/mindnlp/__init__.py
@@ -18,6 +18,7 @@
 
 from mindnlp.dataset import load_dataset, process
 from mindnlp.utils import less_min_pynative_first
+from mindnlp.workflow.workflow import Workflow
 if less_min_pynative_first:
     from mindspore import context
     from mindspore import ms_function as ms_jit

diff --git a/mindnlp/_legacy/functional.py b/mindnlp/_legacy/functional.py
@@ -32,6 +32,40 @@
 cast_ = ops.Cast()
 scalar_to_tensor_ = ops.ScalarToTensor()
 
+
+def masked_select(inputs, mask):
+    """
+    Returns a new 1-D Tensor which indexes the `x` tensor according to the boolean `mask`.
+    The shapes of the `mask` tensor and the `x` tensor don't need to match, but they must be broadcastable.
+
+    Args:
+        input (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+        mask (Tensor[bool]): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+
+    Returns:
+        A 1-D Tensor, with the same type as `input`.
+
+    Raises:
+        TypeError: If `input` or `mask` is not a Tensor.
+        TypeError: If dtype of `mask` is not bool.
+
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+
+    Examples:
+        >>> import numpy as np
+        >>> import mindspore.ops as ops
+        >>> from mindspore import Tensor
+        >>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
+        >>> mask = Tensor(np.array([1, 0, 1, 0]), mindspore.bool_)
+        >>> output = ops.masked_select(x, mask)
+        >>> print(output)
+        [1 3]
+    """
+    masked_select_ = _get_cache_prim(ops.MaskedSelect)()
+    return masked_select_(inputs, mask)
+
+
 def kl_div(inputs, target, reduction='none', log_target=False):
     """KLDiv function."""
     if log_target:

diff --git a/mindnlp/abc/modules/embedding.py b/mindnlp/abc/modules/embedding.py
@@ -21,24 +21,21 @@
 from abc import abstractmethod
 from mindspore import nn
 from mindspore import Parameter
-from mindspore.dataset.text.utils import Vocab
 
 
 class TokenEmbedding(nn.Cell):
     r"""
-    Create vocab and Embedding from a given pre-trained vector file.
+    Create Embedding from a given pre-trained vector file.
 
     Args:
-        vocab (Vocab): Passins into Vocab for initialization.
         init_embed (Tensor): Passing into Vocab and Tensor,use these values to initialize Embedding directly.
         requires_grad (bool): Whether this parameter needs to be gradient to update.
         dropout (float): Dropout of the output of Embedding.
 
     """
-    def __init__(self, vocab: Vocab, init_embed, requires_grad: bool = True, dropout=0.0):
+    def __init__(self, init_embed, requires_grad: bool = True, dropout=0.0):
         super().__init__()
 
-        self._word_vocab = vocab
         self.embed = Parameter(init_embed, name='embed', requires_grad=requires_grad)
         self.dropout_layer = nn.Dropout(1 - dropout)
         self._embed_size = self.embed.shape
@@ -72,13 +69,7 @@ def num_embeddings(self):
         """
         num embeddings
         """
-        return len(self._word_vocab.vocab())
-
-    def get_word_vocab(self):
-        """
-        get word vocab
-        """
-        return self._word_vocab.vocab()
+        return len(self.embed)
 
     @abstractmethod
     def construct(self, ids):
-Original file line number
+Diff line change
@@ Expand Up / @@ -133,4 +133,5 @@ kernel_meta/ @@
     rank_0/
     .vscode/
-    *.ckpt
+    *.ckpt
+    download.txt