Skip to content

Commit

Permalink
fix tests - bump up version
Browse files Browse the repository at this point in the history
  • Loading branch information
thomwolf committed Feb 17, 2019
1 parent ffd6238 commit 009ee86
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 79 deletions.
2 changes: 1 addition & 1 deletion pytorch_pretrained_bert/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.5.1"
__version__ = "0.6.0"
from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
Expand Down
36 changes: 22 additions & 14 deletions pytorch_pretrained_bert/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,24 @@ def load_tf_weights_in_gpt2(model, gpt2_checkpoint_path):
print("Loading TF weight {} with shape {}".format(name, shape))
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
arrays.append(array.squeeze())

for name, array in zip(names, arrays):
name = name[6:] # skip "model/"
name = name.split('/')
pointer = model
for m_name in name:
if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
l = re.split(r'_(\d+)', m_name)
if re.fullmatch(r'[A-Za-z]+\d+', m_name):
l = re.split(r'(\d+)', m_name)
else:
l = [m_name]
if l[0] == 'w' or l[0] == 'g':
pointer = getattr(pointer, 'weight')
elif l[0] == 'b':
pointer = getattr(pointer, 'bias')
elif l[0] == 'wpe' or l[0] == 'wte':
pointer = getattr(pointer, l[0])
pointer = getattr(pointer, 'weight')
else:
pointer = getattr(pointer, l[0])
if len(l) >= 2:
Expand Down Expand Up @@ -107,7 +111,7 @@ class GPT2Config(object):

def __init__(
self,
vocab_size_or_config_json_file=40478,
vocab_size_or_config_json_file=50257,
n_positions=1024,
n_ctx=1024,
n_embd=768,
Expand Down Expand Up @@ -273,10 +277,10 @@ def __init__(self, n_ctx, config, scale=False):
self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
self.mlp = MLP(4 * nx, config)

def forward(self, x, past):
def forward(self, x, past=None):
a, present = self.attn(self.ln_1(x), past=past)
x = x + a
m = self.mlp(self.ln_2(c))
m = self.mlp(self.ln_2(x))
x = x + m
return x, present

Expand Down Expand Up @@ -522,8 +526,12 @@ def __init__(self, config):

self.apply(self.init_weights)

def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None):
past_length = 0 if past is None else past[0][0].size(-2)
def forward(self, input_ids, position_ids=None, token_type_ids=None, pasts=None):
if pasts is None:
past_length = 0
pasts = [None] * len(self.h)
else:
pasts[0][0].size(-2)
if position_ids is None:
position_ids = torch.arange(past_length, input_ids.size(-1) + past_length, dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
Expand All @@ -541,8 +549,8 @@ def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None):
token_type_embeds = 0
hidden_states = inputs_embeds + position_embeds + token_type_embeds
presents = []
for block in self.h:
hidden_states, present = block(hidden_states)
for block, past in zip(self.h, pasts):
hidden_states, present = block(hidden_states, past)
presents.append(present)
hidden_states = self.ln_f(hidden_states)
output_shape = input_shape + (hidden_states.size(-1),)
Expand Down Expand Up @@ -599,8 +607,8 @@ def set_tied(self):
"""
self.lm_head.set_embeddings_weights(self.transformer.wte.weight)

def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None):
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, past)
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, pasts=None):
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, pasts)
lm_logits = self.lm_head(hidden_states)
if lm_labels is not None:
loss_fct = CrossEntropyLoss(ignore_index=-1)
Expand Down Expand Up @@ -665,8 +673,8 @@ def set_tied(self):
"""
self.lm_head.set_embeddings_weights(self.transformer.wte.weight)

def forward(self, input_ids, mc_token_ids, lm_labels=None, mc_labels=None, token_type_ids=None, position_ids=None, past=None):
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, past)
def forward(self, input_ids, mc_token_ids, lm_labels=None, mc_labels=None, token_type_ids=None, position_ids=None, pasts=None):
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, pasts)
lm_logits = self.lm_head(hidden_states)
mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids)
losses = []
Expand Down
2 changes: 1 addition & 1 deletion pytorch_pretrained_bert/modeling_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def load_tf_weights_in_openai_gpt(model, openai_checkpoint_folder_path):
init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)]

# Thsi as used when we had a single embedding matrix for positions and tokens
# This was used when we had a single embedding matrix for positions and tokens
# init_params[0] = np.concatenate([init_params[1], init_params[0]], 0)
# del init_params[1]
init_params = [arr.squeeze() for arr in init_params]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

setup(
name="pytorch_pretrained_bert",
version="0.5.1",
version="0.6.0",
author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors, Open AI team Authors",
author_email="thomas@huggingface.co",
description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",
Expand Down
9 changes: 3 additions & 6 deletions tests/modeling_gpt2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def __init__(self,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
n_special=1,
n_positions=33,
n_embd=32,
n_layer=5,
Expand All @@ -56,7 +55,6 @@ def __init__(self,
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.n_special = n_special
self.n_positions = n_positions
self.n_embd = n_embd
self.n_layer = n_layer
Expand All @@ -76,7 +74,7 @@ def prepare_config_and_inputs(self):

token_type_ids = None
if self.use_token_type_ids:
total_voc = self.vocab_size + self.n_special
total_voc = self.vocab_size
token_type_ids = GPT2ModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)

mc_labels = None
Expand All @@ -90,7 +88,6 @@ def prepare_config_and_inputs(self):
config = GPT2Config(
vocab_size_or_config_json_file=self.vocab_size,
n_positions=self.n_positions,
n_special=self.n_special,
n_embd=self.n_embd,
n_layer=self.n_layer,
n_head=self.n_head,
Expand Down Expand Up @@ -130,7 +127,7 @@ def create_gpt2_lm_head(self, config, input_ids, token_type_ids, position_ids,
return outputs

def check_gpt2_lm_head_output(self, result):
total_voc = self.n_special + self.vocab_size
total_voc = self.vocab_size
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
Expand All @@ -157,7 +154,7 @@ def create_gpt2_double_heads(self, config, input_ids, token_type_ids, position_i
return outputs

def check_gpt2_double_heads_output(self, result):
total_voc = self.n_special + self.vocab_size
total_voc = self.vocab_size
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
Expand Down
56 changes: 0 additions & 56 deletions tests/tokenization_gpt2_test.py

This file was deleted.

0 comments on commit 009ee86

Please sign in to comment.