Skip to content

Commit

Permalink
Merge branch 'mindspore-lab:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Yicorner authored Mar 21, 2023
2 parents eb04d89 + 7e8d8d6 commit fe97d3f
Show file tree
Hide file tree
Showing 62 changed files with 4,084 additions and 463 deletions.
28 changes: 28 additions & 0 deletions .github/install_mindspore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import re
import requests
import os

def gen_url(os, py_version):
hf_url = 'https://huggingface.co/lvyufeng/mindspore-daily/resolve/main/'
whl_name = 'mindspore-newest-cp{}-cp{}-{}.whl'
py_version = py_version.replace('.', '')

if os == 'ubuntu-latest':
platform = 'linux_x86_64'
elif os == 'macos-latest':
platform = 'macosx_10_15_x86_64'
elif os == 'windows-latest':
platform = 'win_amd64'
else:
raise ValueError(f'not support this operate system {os}')

py_version2 = py_version if py_version != '37' else py_version + 'm'
whl_name = whl_name.format(py_version, py_version2, platform)

with open('download.txt', 'w', encoding='utf-8') as f:
f.write(hf_url + whl_name)

if __name__ == '__main__':
platform = os.environ['OS']
python = os.environ['PYTHON']
gen_url(platform, python)
24 changes: 24 additions & 0 deletions .github/workflows/ci_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements/dev_requirements.txt
- name: Install MindSpore
shell: bash
env:
OS: ubuntu-latest
PYTHON: 3.7
run: |
python .github/install_mindspore.py
pip install -r download.txt
- name: Analysing the mindnlp code with pylint
run: |
pylint mindnlp --rcfile=.github/pylint.conf
Expand All @@ -60,6 +68,14 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements/dev_requirements.txt
- name: Install MindSpore
shell: bash
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python }}
run: |
python .github/install_mindspore.py
pip install -r download.txt
- name: Test with pytest
run: |
pytest -m 'not dataset' tests/ut
Expand All @@ -81,6 +97,14 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements/dev_requirements.txt
- name: Install MindSpore
shell: bash
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python }}
run: |
python .github/install_mindspore.py
pip install -r download.txt
- name: Test ST with pytest
run: |
pytest tests/st
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ kernel_meta/
rank_0/

.vscode/
*.ckpt
*.ckpt
download.txt
106 changes: 106 additions & 0 deletions docs/api/metrics.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Metrics
========

accuracy
---------------------------------------

.. automodule:: mindnlp.metrics.accuracy
:members:
:undoc-members:
:show-inheritance:

bleu
---------------------------------------

.. automodule:: mindnlp.metrics.bleu
:members:
:undoc-members:
:show-inheritance:

confusion_matrix
---------------------------------------

.. automodule:: mindnlp.metrics.confusion_matrix
:members:
:undoc-members:
:show-inheritance:

distinct
---------------------------------------

.. automodule:: mindnlp.metrics.distinct
:members:
:undoc-members:
:show-inheritance:

em\_score
---------------------------------------

.. automodule:: mindnlp.metrics.em_score
:members:
:undoc-members:
:show-inheritance:

f1
---------------------------------------

.. automodule:: mindnlp.metrics.f1
:members:
:undoc-members:
:show-inheritance:

matthews
---------------------------------------

.. automodule:: mindnlp.metrics.matthews
:members:
:undoc-members:
:show-inheritance:

pearson
---------------------------------------

.. automodule:: mindnlp.metrics.pearson
:members:
:undoc-members:
:show-inheritance:

perplexity
---------------------------------------

.. automodule:: mindnlp.metrics.perplexity
:members:
:undoc-members:
:show-inheritance:

precision
---------------------------------------

.. automodule:: mindnlp.metrics.precision
:members:
:undoc-members:
:show-inheritance:

recall
---------------------------------------

.. automodule:: mindnlp.metrics.recall
:members:
:undoc-members:
:show-inheritance:

rouge
---------------------------------------

.. automodule:: mindnlp.metrics.rouge
:members:
:undoc-members:
:show-inheritance:

spearman
---------------------------------------

.. automodule:: mindnlp.metrics.spearman
:members:
:undoc-members:
:show-inheritance:
7 changes: 0 additions & 7 deletions docs/api/scoring.rst

This file was deleted.

2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ What is MindNLP
api/abc
api/dataset
api/engine
api/metrics
api/modules
api/models
api/scoring
api/transforms
api/utils
30 changes: 15 additions & 15 deletions docs/quick_start/use_callback.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,25 @@ The code of using engine is as follows:

.. code:: python
import numpy as np
import mindspore.dataset as ds
from mindspore import nn
from mindnlp.engine.trainer import Trainer
from mindnlp.engine.callbacks.timer_callback import TimerCallback
from mindnlp.engine.callbacks.earlystop_callback import EarlyStopCallback
class MyDataset:
"""Dataset"""
def __init__(self):
self.data = np.random.randn(20, 3).astype(np.float32)
self.label = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
self.length = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
def __getitem__(self, index):
return self.data[index], self.label[index], self.length[index]
def __len__(self):
return len(self.data)
"""Dataset"""
def __init__(self):
self.data = np.random.randn(20, 3).astype(np.float32)
self.label = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
self.length = list(np.random.choice([0, 1]).astype(np.float32) for i in range(20))
def __getitem__(self, index):
return self.data[index], self.label[index], self.length[index]
def __len__(self):
return len(self.data)
class MyModel(nn.Cell):
"""Model"""
Expand All @@ -48,9 +50,7 @@ The code of using engine is as follows:
# Define Dataset
dataset_generator = MyDataset()
train_dataset = ds.GeneratorDataset(dataset_generator, ["data", "label", "length"], shuffle=False)
eval_dataset = ds.GeneratorDataset(dataset_generator, ["data", "label", "length"], shuffle=False)
train_dataset = train_dataset.batch(4)
eval_dataset = eval_dataset.batch(4)
# Define Model
net = MyModel()
net.update_parameters_name('net.')
Expand All @@ -61,10 +61,10 @@ The code of using engine is as follows:
# Define Callback
timer_callback = TimerCallback(print_steps=2)
# Define Trainer
trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=eval_dataset,
trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=None,
epochs=6, optimizer=optimizer, loss_fn=loss_fn, callbacks=timer_callback)
# Run Trainer
trainer.run(tgt_columns='label', jit=True)
trainer.run(tgt_columns='label')
Callbacks in MindNLP
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -128,6 +128,6 @@ and the average loss value after each epoch.
logging.info('Avg loss at epoch %d, %.6f', self.epoch, avg_loss)
my_callback = MyCallBack()
trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=eval_dataset,
trainer = Trainer(network=net, train_dataset=train_dataset, eval_dataset=None,
epochs=6, optimizer=optimizer, loss_fn=loss_fn, callbacks=my_callback)
trainer.run(tgt_columns='label', jit=True)
trainer.run(tgt_columns='label')
4 changes: 2 additions & 2 deletions examples/language_model/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from mindspore.common.initializer import XavierUniform
from mindspore.dataset.text.utils import Vocab
from mindnlp.engine.trainer import Trainer
from mindnlp.engine.metrics import Accuracy
from mindnlp.metrics import Accuracy
from mindnlp.modules.embeddings import Glove
from mindnlp import load_dataset, process

Expand Down Expand Up @@ -65,7 +65,7 @@ def construct(self, text):
return classifier


ag_news_train, ag_news_test = load('ag_news', shuffle=True)
ag_news_train, ag_news_test = load_dataset('ag_news', shuffle=True)

vocab_size = 1383812
embedding_dims = 16
Expand Down
1 change: 1 addition & 0 deletions mindnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from mindnlp.dataset import load_dataset, process
from mindnlp.utils import less_min_pynative_first
from mindnlp.workflow.workflow import Workflow
if less_min_pynative_first:
from mindspore import context
from mindspore import ms_function as ms_jit
Expand Down
34 changes: 34 additions & 0 deletions mindnlp/_legacy/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,40 @@
cast_ = ops.Cast()
scalar_to_tensor_ = ops.ScalarToTensor()


def masked_select(inputs, mask):
"""
Returns a new 1-D Tensor which indexes the `x` tensor according to the boolean `mask`.
The shapes of the `mask` tensor and the `x` tensor don't need to match, but they must be broadcastable.
Args:
input (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
mask (Tensor[bool]): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
Returns:
A 1-D Tensor, with the same type as `input`.
Raises:
TypeError: If `input` or `mask` is not a Tensor.
TypeError: If dtype of `mask` is not bool.
Supported Platforms:
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> import numpy as np
>>> import mindspore.ops as ops
>>> from mindspore import Tensor
>>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
>>> mask = Tensor(np.array([1, 0, 1, 0]), mindspore.bool_)
>>> output = ops.masked_select(x, mask)
>>> print(output)
[1 3]
"""
masked_select_ = _get_cache_prim(ops.MaskedSelect)()
return masked_select_(inputs, mask)


def kl_div(inputs, target, reduction='none', log_target=False):
"""KLDiv function."""
if log_target:
Expand Down
15 changes: 3 additions & 12 deletions mindnlp/abc/modules/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,21 @@
from abc import abstractmethod
from mindspore import nn
from mindspore import Parameter
from mindspore.dataset.text.utils import Vocab


class TokenEmbedding(nn.Cell):
r"""
Create vocab and Embedding from a given pre-trained vector file.
Create Embedding from a given pre-trained vector file.
Args:
vocab (Vocab): Passins into Vocab for initialization.
init_embed (Tensor): Passing into Vocab and Tensor,use these values to initialize Embedding directly.
requires_grad (bool): Whether this parameter needs to be gradient to update.
dropout (float): Dropout of the output of Embedding.
"""
def __init__(self, vocab: Vocab, init_embed, requires_grad: bool = True, dropout=0.0):
def __init__(self, init_embed, requires_grad: bool = True, dropout=0.0):
super().__init__()

self._word_vocab = vocab
self.embed = Parameter(init_embed, name='embed', requires_grad=requires_grad)
self.dropout_layer = nn.Dropout(1 - dropout)
self._embed_size = self.embed.shape
Expand Down Expand Up @@ -72,13 +69,7 @@ def num_embeddings(self):
"""
num embeddings
"""
return len(self._word_vocab.vocab())

def get_word_vocab(self):
"""
get word vocab
"""
return self._word_vocab.vocab()
return len(self.embed)

@abstractmethod
def construct(self, ids):
Expand Down
Loading

0 comments on commit fe97d3f

Please sign in to comment.