Skip to content

Commit

Permalink
refactor dir (mindspore-lab#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
lvyufeng authored Mar 6, 2023
1 parent b7bd555 commit aa252c5
Show file tree
Hide file tree
Showing 131 changed files with 444 additions and 283 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ imdb_train, imdb_test = load('imdb', shuffle=True)
Initializes the vocab and tokenizer for preprocessing:
```python
from mindnlp.modules import Glove
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer

embedding, vocab = Glove.from_pretrained('6B', 100, special_tokens=["<unk>", "<pad>"], dropout=drop)
tokenizer = BasicTokenizer(True)
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/fasttext.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Initializes the vocab and tokenizer for preprocessing:
.. code:: python
from mindnlp.modules import Glove
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
tokenizer = BasicTokenizer(True)
embedding, vocab = Glove.from_pretrained('6B', 100)
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/machine_translation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Initialize the vocab and process the data set:

.. code:: python
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
from mindspore.dataset import text
from mindnlp.dataset import process
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/question_answer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ Then initialize the tokenizer:

.. code:: python
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
tokenizer = BasicTokenizer(True)
Expand Down
2 changes: 1 addition & 1 deletion docs/quick_start/dataset_and_transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ vocab:

.. code:: python
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
tokenizer = BasicTokenizer(True)
dataset_train= dataset_train.map([tokenizer], 'en')
Expand Down
2 changes: 1 addition & 1 deletion docs/quick_start/train_and_eval.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ And then we initialize the tokenizer by instantiating the class

.. code-block:: python
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
tokenizer = BasicTokenizer(True)
Expand Down
5 changes: 3 additions & 2 deletions examples/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
from mindnlp.engine.trainer import Trainer
from mindnlp.engine.metrics import Accuracy
from mindnlp.modules.embeddings import Glove
from mindnlp.dataset import load, process
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp import load_dataset, process

from mindnlp.transforms import BasicTokenizer


class FasttextModel(nn.Cell):
Expand Down
5 changes: 3 additions & 2 deletions examples/machine_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
from mindnlp.engine.callbacks.timer_callback import TimerCallback
from mindnlp.engine.callbacks.earlystop_callback import EarlyStopCallback
from mindnlp.engine.callbacks.best_model_callback import BestModelCallback
from mindnlp.dataset import load, process
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp import load_dataset, process

from mindnlp.transforms import BasicTokenizer

# ms.set_context(device_target="GPU") # set GPU

Expand Down
4 changes: 2 additions & 2 deletions examples/question_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@

from mindnlp.abc import Seq2vecModel
from mindnlp.engine.trainer import Trainer
from mindnlp.dataset.register import load, process
from mindnlp.dataset.register import load_dataset, process
from mindnlp.modules.embeddings import Word2vec, Glove
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer

# mindspore.set_context(mode=context.PYNATIVE_MODE ,max_call_depth=10000)
# mindspore.set_context(mode=context.GRAPH_MODE ,max_call_depth=10000, enable_graph_kernel=True)
Expand Down
6 changes: 6 additions & 0 deletions examples/sentiment_analysis/bilstm_imdb_concise.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
105 changes: 105 additions & 0 deletions examples/sentiment_analysis/bilstm_imdb_scratch.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "f29588be-fbc2-4101-8734-661a716ea1c7",
"metadata": {},
"outputs": [],
"source": [
"import mindspore\n",
"import mindnlp\n",
"from mindspore import nn\n",
"from mindnlp.modules import Glove\n",
"from mindnlp.transforms import BasicTokenizer"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "daab0038-dc5d-4b0f-ab3b-5e8793c7f5e8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80.2M/80.2M [00:21<00:00, 3.97MB/s]\n"
]
}
],
"source": [
"imdb_train, imdb_test = mindnlp.load_dataset('imdb', split=['train', 'test'], shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6caea8f-e16f-4df9-a275-d5cb51c4b817",
"metadata": {},
"outputs": [],
"source": [
"tokenizer = BasicTokenizer(lower_case=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9bae9d35-ba6b-40d7-9b7d-6d91ee2c5f46",
"metadata": {},
"outputs": [],
"source": [
"imdb_train = imdb_train.map(tokenizer, 'text')\n",
"imdb_test = imdb_test.map(tokenizer, 'text')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "206e4277-af8e-430b-9533-1a612daf0a80",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 822M/822M [03:40<00:00, 3.91MB/s]\n"
]
}
],
"source": [
"# load embedding and vocab\n",
"embedding, vocab = Glove.from_pretrained('6B', 100, special_tokens=[\"<unk>\", \"<pad>\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f07b456-0277-4387-b438-7f7b1a083b5b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
5 changes: 3 additions & 2 deletions examples/sentiment_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@

from mindnlp.abc import Seq2vecModel
from mindnlp.engine import Trainer, Accuracy
from mindnlp.dataset import load, process
from mindnlp import load_dataset, process

from mindnlp.modules import Glove, RNNEncoder
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer

# Hyper-parameters
hidden_size = 256
Expand Down
4 changes: 3 additions & 1 deletion mindnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
MindNLP library.
"""

from mindnlp.dataset import load_dataset, process
from mindnlp.utils import less_min_pynative_first
if less_min_pynative_first:
from mindspore import context
Expand All @@ -24,4 +25,5 @@
else:
from mindspore import jit as ms_jit

__all__ = ['ms_jit']

__all__ = ['ms_jit', 'load_dataset', 'process']
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Huawei Technologies Co., Ltd
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Transforms
"""
from .tokenizers import BasicTokenizer
from .seq_process import TruncateSequence


"""Custom api for legacy mindspore"""
2 changes: 1 addition & 1 deletion mindnlp/common/amp.py → mindnlp/_legacy/amp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Huawei Technologies Co., Ltd
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
File renamed without changes.
File renamed without changes.
24 changes: 24 additions & 0 deletions mindnlp/_legacy/nn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""nn layer for legacy mindspore"""

from .transformer import Transformer, TransformerDecoder, TransformerEncoder, \
TransformerEncoderLayer, TransformerDecoderLayer, MultiheadAttention

__all__ = [
'Transformer', 'TransformerEncoder', 'TransformerDecoder',
'TransformerEncoderLayer', 'TransformerDecoderLayer',
'MultiheadAttention']
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from mindspore import nn, ops, Parameter, Tensor
from mindspore.common.initializer import initializer, XavierUniform, HeUniform, Uniform, \
_calculate_fan_in_and_fan_out
from ..initializer import XavierNormal
from ..functional import multi_head_attention_forward, relu, gelu
from mindnlp._legacy.initializer import XavierNormal
from mindnlp._legacy.functional import multi_head_attention_forward, relu, gelu


class Linear(nn.Dense):
Expand Down
14 changes: 0 additions & 14 deletions mindnlp/common/nn/__init__.py

This file was deleted.

File renamed without changes.
1 change: 0 additions & 1 deletion mindnlp/configs/dataset_url.json

This file was deleted.

2 changes: 1 addition & 1 deletion mindnlp/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
from .question_answer import *
from .sequence_tagging import *
from .text_generation import *
from .register import load, process
from .register import load_dataset, process
4 changes: 2 additions & 2 deletions mindnlp/dataset/machine_translation/iwslt2016.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from typing import Union, Tuple
from mindspore.dataset import IWSLT2016Dataset
from mindnlp.utils.download import cache_file
from mindnlp.dataset.register import load
from mindnlp.dataset.register import load_dataset
from mindnlp.configs import DEFAULT_ROOT
from mindnlp.utils import untar

Expand Down Expand Up @@ -54,7 +54,7 @@
}


@load.register
@load_dataset.register
def IWSLT2016(root: str = DEFAULT_ROOT,
split: Union[Tuple[str], str] = ("train", "valid", "test"),
language_pair=("de", "en"),
Expand Down
6 changes: 3 additions & 3 deletions mindnlp/dataset/machine_translation/iwslt2017.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import os
from typing import Union, Tuple
from mindspore.dataset import IWSLT2017Dataset
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.transforms import BasicTokenizer
from mindnlp.utils.download import cache_file
from mindnlp.dataset.process import common_process
from mindnlp.dataset.register import load, process
from mindnlp.dataset.register import load_dataset, process
from mindnlp.configs import DEFAULT_ROOT
from mindnlp.utils import untar

Expand All @@ -45,7 +45,7 @@
}


@load.register
@load_dataset.register
def IWSLT2017(root: str = DEFAULT_ROOT,
split: Union[Tuple[str], str] = ("train", "valid", "test"),
language_pair=("de", "en"), proxies=None):
Expand Down
4 changes: 2 additions & 2 deletions mindnlp/dataset/machine_translation/multi30k.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from mindspore.dataset import TextFileDataset, transforms
from mindspore.dataset import text
from mindnlp.utils.download import cache_file
from mindnlp.dataset.register import load, process
from mindnlp.dataset.register import load_dataset, process
from mindnlp.configs import DEFAULT_ROOT
from mindnlp.utils import untar

Expand All @@ -41,7 +41,7 @@
}


@load.register
@load_dataset.register
def Multi30k(root: str = DEFAULT_ROOT, split: Union[Tuple[str], str] = ('train', 'valid', 'test'),
language_pair: Tuple[str] = ('de', 'en'), proxies=None):
r"""
Expand Down
6 changes: 3 additions & 3 deletions mindnlp/dataset/question_answer/squad1.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from mindspore.dataset import GeneratorDataset, text, transforms

from mindnlp.utils.download import cache_file
from mindnlp.dataset.register import load, process
from mindnlp.dataset.transforms import BasicTokenizer
from mindnlp.dataset.register import load_dataset, process
from mindnlp.transforms import BasicTokenizer
from mindnlp.configs import DEFAULT_ROOT

URL = {
Expand Down Expand Up @@ -85,7 +85,7 @@ def __len__(self):
return len(self._anwsers)


@load.register
@load_dataset.register
def SQuAD1(
root: str = DEFAULT_ROOT,
split: Union[Tuple[str], str] = ('train', 'dev'),
Expand Down
Loading

0 comments on commit aa252c5

Please sign in to comment.