forked from huggingface/transformers
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding conversion script adding first draft of modeling & tokenization adding placeholder for test files bunch of changes registering the tokenizer/model/etc tests change link; something is very VERY wrong here weird end-of-word thingy going on i think the tokenization works now ; wrote the unit tests overall structure works;load w next the monster is alive! works after some cleanup as well adding emacs autosave to gitignore currently only supporting the 48 layer one; seems to infer fine on my macbook cleanup fixing some documentation fixing some documentation tests passing? now works on CUDA also adding greedy? adding greedy sampling works well
- Loading branch information
1 parent
2dc8cb8
commit dbed1c5
Showing
12 changed files
with
1,129 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,4 +131,7 @@ examples/runs | |
|
||
# data | ||
/data | ||
serialization_dir | ||
serialization_dir | ||
|
||
# emacs | ||
*.*~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
# coding=utf-8 | ||
# Copyright 2018 Salesforce and HuggingFace Inc. team. | ||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" Salesforce CTRL configuration """ | ||
|
||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
import json | ||
import logging | ||
import sys | ||
from io import open | ||
|
||
from .configuration_utils import PretrainedConfig | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf-ctrl/pytorch/ctrl-config.json"} | ||
|
||
class CTRLConfig(PretrainedConfig): | ||
"""Configuration class to store the configuration of a `CTRLModel`. | ||
Args: | ||
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. | ||
n_positions: Number of positional embeddings. | ||
n_ctx: Size of the causal mask (usually same as n_positions). | ||
dff: Size of the inner dimension of the FFN. | ||
n_embd: Dimensionality of the embeddings and hidden states. | ||
n_layer: Number of hidden layers in the Transformer encoder. | ||
n_head: Number of attention heads for each attention layer in | ||
the Transformer encoder. | ||
layer_norm_epsilon: epsilon to use in the layer norm layers | ||
resid_pdrop: The dropout probabilitiy for all fully connected | ||
layers in the embeddings, encoder, and pooler. | ||
attn_pdrop: The dropout ratio for the attention | ||
probabilities. | ||
embd_pdrop: The dropout ratio for the embeddings. | ||
initializer_range: The sttdev of the truncated_normal_initializer for | ||
initializing all weight matrices. | ||
""" | ||
pretrained_config_archive_map = CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP | ||
|
||
def __init__( | ||
self, | ||
vocab_size_or_config_json_file=246534, | ||
n_positions=50000, | ||
n_ctx=512, | ||
n_embd=1280, | ||
dff=8192, | ||
n_layer=48, | ||
n_head=16, | ||
resid_pdrop=0.1, | ||
embd_pdrop=0.1, | ||
attn_pdrop=0.1, | ||
layer_norm_epsilon=1e-6, | ||
initializer_range=0.02, | ||
|
||
num_labels=1, | ||
summary_type='cls_index', | ||
summary_use_proj=True, | ||
summary_activation=None, | ||
summary_proj_to_labels=True, | ||
summary_first_dropout=0.1, | ||
**kwargs | ||
): | ||
"""Constructs CTRLConfig. | ||
Args: | ||
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. | ||
n_positions: Number of positional embeddings. | ||
n_ctx: Size of the causal mask (usually same as n_positions). | ||
dff: Size of the inner dimension of the FFN. | ||
n_embd: Dimensionality of the embeddings and hidden states. | ||
n_layer: Number of hidden layers in the Transformer encoder. | ||
n_head: Number of attention heads for each attention layer in | ||
the Transformer encoder. | ||
layer_norm_epsilon: epsilon to use in the layer norm layers | ||
resid_pdrop: The dropout probabilitiy for all fully connected | ||
layers in the embeddings, encoder, and pooler. | ||
attn_pdrop: The dropout ratio for the attention | ||
probabilities. | ||
embd_pdrop: The dropout ratio for the embeddings. | ||
initializer_range: The sttdev of the truncated_normal_initializer for | ||
initializing all weight matrices. | ||
""" | ||
super(CTRLConfig, self).__init__(**kwargs) | ||
|
||
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 | ||
and isinstance(vocab_size_or_config_json_file, unicode)): | ||
with open(vocab_size_or_config_json_file, "r", encoding="utf-8") as reader: | ||
json_config = json.loads(reader.read()) | ||
for key, value in json_config.items(): | ||
self.__dict__[key] = value | ||
elif isinstance(vocab_size_or_config_json_file, int): | ||
self.vocab_size = vocab_size_or_config_json_file | ||
self.n_ctx = n_ctx | ||
self.n_positions = n_positions | ||
self.n_embd = n_embd | ||
self.n_layer = n_layer | ||
self.n_head = n_head | ||
self.dff = dff | ||
self.resid_pdrop = resid_pdrop | ||
self.embd_pdrop = embd_pdrop | ||
self.attn_pdrop = attn_pdrop | ||
self.layer_norm_epsilon = layer_norm_epsilon | ||
self.initializer_range = initializer_range | ||
|
||
self.num_labels = num_labels | ||
self.summary_type = summary_type | ||
self.summary_use_proj = summary_use_proj | ||
self.summary_activation = summary_activation | ||
self.summary_first_dropout = summary_first_dropout | ||
self.summary_proj_to_labels = summary_proj_to_labels | ||
else: | ||
raise ValueError( | ||
"First argument must be either a vocabulary size (int)" | ||
"or the path to a pretrained model config file (str)" | ||
) | ||
|
||
@property | ||
def max_position_embeddings(self): | ||
return self.n_positions | ||
|
||
@property | ||
def hidden_size(self): | ||
return self.n_embd | ||
|
||
@property | ||
def num_attention_heads(self): | ||
return self.n_head | ||
|
||
@property | ||
def num_hidden_layers(self): | ||
return self.n_layer |
Oops, something went wrong.