Skip to content

Commit

Permalink
add roberta (mindspore-lab#444)
Browse files Browse the repository at this point in the history
  • Loading branch information
lvyufeng authored Apr 19, 2023
1 parent d28edf3 commit 1c3e2b7
Show file tree
Hide file tree
Showing 51 changed files with 553 additions and 166 deletions.
2 changes: 1 addition & 1 deletion .github/pylint.conf
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=120
max-line-length=150

# Maximum number of lines in a module.
max-module-lines=1500
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@
[Tutorials](#tutorials) |
[Notes](#notes)

## News 📢

* 🔥 **Latest Features**
* 📃 Support PreTrained Models, including **[BERT](./mindnlp/models/bert)**, **[BERT](./mindnlp/models/bert)**, **[GPT2](./mindnlp/models/gpt2)** and **[T5](./mindnlp/models/t5)**.
You can use them by following code snippet:
```python
from mindnlp.models import BertModel

model = BertModel.from_pretrained('bert-base-cased')
```



## Introduction

MindNLP is an open source NLP library based on MindSpore. It supports a platform for solving natural language processing tasks, containing many common approaches in NLP. It can help researchers and developers to construct and train models more conveniently and rapidly.
Expand Down
2 changes: 1 addition & 1 deletion mindnlp/abc/backbones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
from .base import BaseModel
from .seq2seq import Seq2seqModel
from .seq2vec import Seq2vecModel
from .pretrained import PretrainedConfig, PretrainedModel
from .pretrained import PreTrainedConfig, PreTrainedModel
94 changes: 47 additions & 47 deletions mindnlp/abc/backbones/pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
import os
import logging
from typing import Union, Optional, Tuple, Dict
import mindspore
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore import nn, ops

from ...utils.download import cached_path
from mindnlp.configs import HF_CONFIG_URL_BASE, HF_MODEL_URL_BASE
from mindnlp.utils.download import cached_path

logger = logging.getLogger(__name__)

class PretrainedConfig:
class PreTrainedConfig:
"""
Abstract class for Pretrained models config.
"""
Expand Down Expand Up @@ -103,7 +103,7 @@ def use_return_dict(self) -> bool:
return self.return_dict

@classmethod
def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
def from_dict(cls, config_dict: Dict, **kwargs) -> "PreTrainedConfig":
"""
Constructs a `Config` from a Python dictionary of parameters.
Expand Down Expand Up @@ -140,7 +140,7 @@ def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
return config

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "PretrainedConfig":
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "PreTrainedConfig":
"""from_pretrained"""
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
return cls.from_dict(config_dict, **kwargs)
Expand All @@ -164,28 +164,38 @@ def get_config_dict(
"""
cache_dir = kwargs.pop("cache_dir", None)
kwargs.pop("force_download", False)
kwargs.pop("resume_download", False)
_ = kwargs.pop("force_download", False)
_ = kwargs.pop("resume_download", False)
proxies = kwargs.pop("proxies", None)
kwargs.pop("local_files_only", False)
_ = kwargs.pop("local_files_only", False)
from_pt = kwargs.pop("from_pt", False)

folder_name = None
if pretrained_config_archive_map is None:
pretrained_config_archive_map = cls.pretrained_config_archive_map

if pretrained_model_name_or_path in pretrained_config_archive_map:
config_file = pretrained_config_archive_map[pretrained_model_name_or_path]
folder_name = pretrained_model_name_or_path
elif os.path.isdir(pretrained_model_name_or_path):
config_file = os.path.join(pretrained_model_name_or_path, "config.json")
elif os.path.isfile(pretrained_model_name_or_path):
config_file = pretrained_model_name_or_path
elif from_pt:
config_file = HF_CONFIG_URL_BASE.format(pretrained_model_name_or_path)
folder_name = pretrained_model_name_or_path
else:
raise ValueError(f'not found config of {pretrained_model_name_or_path}')

try:
# Load from URL or cache if already cached
resolved_config_file = str(cached_path(
config_file,
cache_dir=cache_dir,
proxies=proxies,
))
folder_name=folder_name
)[0])

# Load config dict
if resolved_config_file is None:
raise EnvironmentError
Expand All @@ -198,7 +208,7 @@ def get_config_dict(
msg = (
f"Can't load '{pretrained_model_name_or_path}'. Make sure that:\n\n"
f"- '{pretrained_model_name_or_path}' "
f"is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
f"is a correct model identifier listed on 'https://download.mindspore.cn/toolkits/mindnlp/models'\n\n"
f"- or '{pretrained_model_name_or_path}' "
f"is the correct path to a directory containing a config.json file\n\n"
)
Expand Down Expand Up @@ -236,7 +246,7 @@ def to_json_string(self):
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class PretrainedModel(nn.Cell):
class PreTrainedModel(nn.Cell):
"""
Abstract class for Pretrained models
"""
Expand Down Expand Up @@ -428,53 +438,28 @@ def load(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
Params:
pretrained_model_name_or_path:
"""
return cls.from_pretrained(pretrained_model_name_or_path, args, kwargs)

# Todo: load huggingface checkpoint
config = kwargs.pop("config", None)
# load config
if not isinstance(config, PretrainedConfig):
config_path = config if config is not None else pretrained_model_name_or_path
config = cls.config_class.load(config_path)
model = cls(config, *args, **kwargs)
if os.path.exists(pretrained_model_name_or_path):
# File exists.
model_file = os.path.join(pretrained_model_name_or_path)
assert os.path.isfile(model_file)
else:
# Something unknown
raise ValueError(
f"unable to parse {pretrained_model_name_or_path} as a local path or model name")
# load ckpt
try:
param_dict = load_checkpoint(model_file)
except Exception as exc:
raise ValueError(f"File {model_file} is not a checkpoint file, "
f"please check the path.") from exc

param_not_load = load_param_into_net(model, param_dict)
if len(param_not_load) == len(model.trainable_params()):
raise KeyError(f"The following weights in model are not found: {param_not_load}")

return model

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
"""from_pretrained"""
config = kwargs.pop("config", None)
state_dict = kwargs.pop("state_dict", None)
cache_dir = kwargs.pop("cache_dir", None)
kwargs.pop("from_tf", False)
from_pt = kwargs.pop("from_pt", False)
force_download = kwargs.pop("force_download", False)
resume_download = kwargs.pop("resume_download", False)
proxies = kwargs.pop("proxies", None)
local_files_only = kwargs.pop("local_files_only", False)

# Load config if we don't provide a configuration
if not isinstance(config, PretrainedConfig):
if not isinstance(config, PreTrainedConfig):
config_path = config if config is not None else pretrained_model_name_or_path
config, model_kwargs = cls.config_class.from_pretrained(
config_path,
*model_args,
from_pt=from_pt,
cache_dir=cache_dir,
return_unused_kwargs=True,
force_download=force_download,
Expand All @@ -486,28 +471,36 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
else:
model_kwargs = kwargs

folder_name = None
# Load model
if pretrained_model_name_or_path is not None:
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
if pretrained_model_name_or_path in cls.pretrained_model_archive_map and not from_pt:
archive_file = cls.pretrained_model_archive_map[pretrained_model_name_or_path]
folder_name = pretrained_model_name_or_path
elif os.path.isdir(pretrained_model_name_or_path):
archive_file = os.path.join(pretrained_model_name_or_path, "mindspore_model.ckpt")
elif os.path.isfile(pretrained_model_name_or_path):
archive_file = pretrained_model_name_or_path
elif from_pt:
archive_file = HF_MODEL_URL_BASE.format(pretrained_model_name_or_path)
folder_name = pretrained_model_name_or_path
else:
raise ValueError(f'not found model of {pretrained_model_name_or_path}.')

# redirect to the cache, if necessary
try:
resolved_archive_file = str(cached_path(
archive_file,
cache_dir=cache_dir,
proxies=proxies,
))
folder_name=folder_name
)[0])
except EnvironmentError as exc:
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
msg = f"Couldn't reach server at '{archive_file}' to download pretrained weights."
else:
format1 = ", ".join(cls.pretrained_model_archive_map.keys())
format2 = ["mindspore_model.ckpt"]
format2 = ["mindspore.ckpt"]
msg = (
f"Model name '{pretrained_model_name_or_path}' "
f"was not found in model name list ({format1}). "
Expand All @@ -522,19 +515,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
else:
logger.info("loading weights file %s from cache at %s", archive_file, resolved_archive_file)
else:
resolved_archive_file = None
raise ValueError("the argument 'pretrained_model_name_or_path' should be "
"a string of model name or checkpoint path, but got 'None'.")

# Instantiate model.
model = cls(config, *model_args, **model_kwargs)

if from_pt:
resolved_archive_file = cls.convert_torch_to_mindspore(resolved_archive_file)

if state_dict is None:
try:
state_dict = mindspore.load_checkpoint(resolved_archive_file)
state_dict = load_checkpoint(resolved_archive_file)
except Exception as exc:
raise OSError(
"Unable to load weights from mindspore checkpoint file. "
f"Unable to load weights from mindspore checkpoint file '{resolved_archive_file}'. "
) from exc

mindspore.load_param_into_net(model, state_dict)
not_loaded = load_param_into_net(model, state_dict)[0]

if not_loaded:
raise LookupError(f'found not loaded parameters {not_loaded}.')

return model
6 changes: 6 additions & 0 deletions mindnlp/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@
import os

DEFAULT_ROOT = os.path.join(os.path.expanduser('~'), ".mindnlp")
# for huggingface url
HF_CONFIG_URL_BASE = 'https://huggingface.co/{}/raw/main/config.json'
HF_MODEL_URL_BASE = 'https://huggingface.co/{}/resolve/main/pytorch_model.bin'
# for mindnlp obs storage
MINDNLP_CONFIG_URL_BASE = "https://download.mindspore.cn/toolkits/mindnlp/models/{}/{}/config.json"
MINDNLP_MODEL_URL_BASE = "https://download.mindspore.cn/toolkits/mindnlp/models/{}/{}/mindspore.ckpt"
11 changes: 8 additions & 3 deletions mindnlp/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@
"""
Models init
"""
from mindnlp.models import bert
from mindnlp.models.bert import *
from mindnlp.models.erine import *
from . import bert, gpt2, t5, ernie
from .bert import *
from .gpt2 import *
from .t5 import *
from .ernie import *

__all__ = []
__all__.extend(bert.__all__)
__all__.extend(gpt2.__all__)
__all__.extend(t5.__all__)
__all__.extend(ernie.__all__)
19 changes: 10 additions & 9 deletions mindnlp/models/albert/albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from mindspore import nn
from mindspore import ops
from mindspore.common.initializer import TruncatedNormal
from mindnlp.abc.backbones.pretrained import PretrainedModel
from mindnlp.abc.backbones.pretrained import PreTrainedModel

activation_map = {
'relu': nn.ReLU(),
Expand Down Expand Up @@ -259,30 +259,30 @@ def construct(
return outputs # last-layer hidden state, (all hidden states), (all attentions)


class AlbertPretrainedModel(PretrainedModel):
class AlbertPretrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
def get_input_embeddings(self):
pass
"""get input embeddings"""

def get_position_embeddings(self):
pass
"""get position embeddings"""

def init_model_weights(self):
pass
"""init model weights"""

def post_init(self):
pass
"""post init"""

def resize_position_embeddings(self):
pass
"""resize position embeddings"""

def save(self):
pass
"""save"""

def set_input_embeddings(self):
pass
"""set input embeddings"""


class AlbertModel(AlbertPretrainedModel):
Expand All @@ -308,6 +308,7 @@ def construct(
output_attentions=None,
output_hidden_states=None,
):
"""construct"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down
4 changes: 2 additions & 2 deletions mindnlp/models/albert/albert_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
"""
ALBERT model config
"""
from mindnlp.abc.backbones.pretrained import PretrainedConfig
from mindnlp.abc.backbones.pretrained import PreTrainedConfig

__all__ = ['AlbertConfig']


class AlbertConfig(PretrainedConfig):
class AlbertConfig(PreTrainedConfig):
"""
Configuration for Albert.
"""
Expand Down
6 changes: 3 additions & 3 deletions mindnlp/models/bert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
"""
Bert Model.
"""
from mindnlp.models.bert import bert, bert_config
from mindnlp.models.bert.bert import *
from mindnlp.models.bert.bert_config import *
from . import bert, bert_config
from .bert import *
from .bert_config import *

__all__ = []
__all__.extend(bert.__all__)
Expand Down
Loading

0 comments on commit 1c3e2b7

Please sign in to comment.