Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak conv-rnn model #75

Merged
merged 4 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conv_rnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ You may then run `python train.py` and `python test.py` for training and testing
### Empirical results
Best dev | Test
-- | --
51.1 | 50.7
52.04359673024523 | 50.85972850678733

### References
[1] Chenglong Wang, Feijun Jiang, and Hongxia Yang. 2017. A Hybrid Framework for Text Modeling with Convolutional RNN. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '17).
9 changes: 1 addition & 8 deletions conv_rnn/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,7 @@
import torch.utils.data as data

def sst_tokenize(sentence):
extraneous_pattern = re.compile(r"^(--lrb--|--rrb--|``|''|--|\.)$")
words = []
for word in sentence.split():
if re.match(extraneous_pattern, word):
continue
words.append(word)
return words
return sentence.split()

class SSTEmbeddingLoader(object):
def __init__(self, dirname, fmt="stsa.fine.{}", word2vec_file="word2vec.sst-1"):
Expand All @@ -26,7 +20,6 @@ def load_embed_data(self):
with open(os.path.join(self.dirname, self.word2vec_file)) as f:
for i, line in enumerate(f.readlines()):
word, vec = line.replace("\n", "").split(" ", 1)
word = word.replace("#", "")
vec = np.array([float(v) for v in vec.split(" ")])
weights.append(vec)
id_dict[word] = i
Expand Down
4 changes: 0 additions & 4 deletions conv_rnn/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ def __init__(self, word_model, **config):
else:
raise ValueError("RNN type must be one of LSTM or GRU")
self.conv = nn.Conv2d(1, n_fmaps, (1, self.hidden_size * 2))
if dropout:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this hurt performance? if we are removing it why are we adding --dropout_prob to train.py?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Paper says no dropout for SST1/2.

self.dropout = nn.Dropout(dropout)
self.fc1 = nn.Linear(n_fmaps + 2 * self.hidden_size, fc_size)
self.fc2 = nn.Linear(fc_size, config["n_labels"])

Expand Down Expand Up @@ -77,8 +75,6 @@ def forward(self, x):
out = [t.squeeze(1) for t in rnn_out.chunk(2, 1)]
out.append(x)
x = torch.cat(out, 1).squeeze(2)
if hasattr(self, "dropout"):
x = self.dropout(x)
x = nn_func.relu(self.fc1(x))
return self.fc2(x)

Expand Down
15 changes: 8 additions & 7 deletions conv_rnn/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import torch
import torch.nn as nn
import torch.utils as utils

import data
import model
Expand All @@ -17,19 +18,19 @@ def main():
parser.add_argument("--gpu_number", default=0, type=int)
args = parser.parse_args()

model.set_seed(5, no_cuda=args.no_cuda)
data_loader = data.SSTDataLoader(args.data_dir)
model.set_seed(3, no_cuda=args.no_cuda)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make seed configurable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the seed is useless in test.py, I'll remove it later.

conv_rnn = torch.load(args.input_file)
if not args.no_cuda:
torch.cuda.set_device(args.gpu_number)
conv_rnn.cuda()
_, _, test_set = data_loader.load_sst_sets()
_, _, test_set = data.SSTDataset.load_sst_sets("data")
test_loader = utils.data.DataLoader(test_set, batch_size=len(test_set), collate_fn=conv_rnn.convert_dataset)

conv_rnn.eval()
test_in, test_out = conv_rnn.convert_dataset(test_set)
scores = conv_rnn(test_in)
n_correct = (torch.max(scores, 1)[1].view(len(test_set)).data == test_out.data).sum()
accuracy = n_correct / len(test_set)
for test_in, test_out in test_loader:
scores = conv_rnn(test_in)
n_correct = (torch.max(scores, 1)[1].view(-1).data == test_out.data).sum()
accuracy = n_correct / len(test_set)
print("Test set accuracy: {}".format(accuracy))

if __name__ == "__main__":
Expand Down
33 changes: 18 additions & 15 deletions conv_rnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import os
import random

from torch import utils
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import torch
import torch.nn as nn
from torch import utils

import data
import model
Expand Down Expand Up @@ -58,7 +59,6 @@ def train(**kwargs):
verbose = not kwargs["quiet"]
lr = kwargs["lr"]
weight_decay = kwargs["weight_decay"]
gradient_clip = kwargs["gradient_clip"]
seed = kwargs["seed"]

if not kwargs["no_cuda"]:
Expand All @@ -79,7 +79,8 @@ def train(**kwargs):
conv_rnn.train()
criterion = nn.CrossEntropyLoss()
parameters = list(filter(lambda p: p.requires_grad, conv_rnn.parameters()))
optimizer = torch.optim.Adadelta(parameters, lr=lr, weight_decay=weight_decay)
optimizer = torch.optim.SGD(parameters, lr=lr, weight_decay=weight_decay, momentum=0.9)
scheduler = ReduceLROnPlateau(optimizer, patience=kwargs["dev_per_epoch"] * 4)
train_set, dev_set, test_set = data.SSTDataset.load_sst_sets("data")

collate_fn = conv_rnn.convert_dataset
Expand All @@ -92,26 +93,26 @@ def evaluate(loader, dev=True):
conv_rnn.eval()
for m_in, m_out in loader:
scores = conv_rnn(m_in)
loss = criterion(scores, m_out)
loss = criterion(scores, m_out).cpu().data[0]
n_correct = (torch.max(scores, 1)[1].view(m_in.size(0)).data == m_out.data).sum()
accuracy = n_correct / m_in.size(0)
if dev and accuracy > evaluate.best_dev:
scheduler.step(accuracy)
if dev and accuracy >= evaluate.best_dev:
evaluate.best_dev = accuracy
print("Saving best model ({})...".format(accuracy))
torch.save(conv_rnn, kwargs["output_file"])
if verbose:
print("{} set accuracy: {}, loss: {}".format("dev" if dev else "test", accuracy, loss.cpu().data[0]))
print("{} set accuracy: {}, loss: {}".format("dev" if dev else "test", accuracy, loss))
conv_rnn.train()
evaluate.best_dev = 0

for epoch in range(n_epochs):
optimizer.zero_grad()
print("Epoch number: {}".format(epoch), end="\r")
if verbose:
print()
i = 0
for j, (train_in, train_out) in enumerate(train_loader):
if verbose and i % (mbatch_size * 10) == 0:
print("{} / {}".format(j * mbatch_size, len(train_set)), end="\r")
optimizer.zero_grad()

if not kwargs["no_cuda"]:
train_in.cuda()
Expand All @@ -120,10 +121,12 @@ def evaluate(loader, dev=True):
scores = conv_rnn(train_in)
loss = criterion(scores, train_out)
loss.backward()
torch.nn.utils.clip_grad_norm(parameters, gradient_clip)
optimizer.step()
accuracy = (torch.max(scores, 1)[1].view(-1).data == train_out.data).sum() / mbatch_size
if verbose and i % (mbatch_size * 10) == 0:
print("accuracy: {}, {} / {}".format(accuracy, j * mbatch_size, len(train_set)))
i += mbatch_size
if i % (mbatch_size * 256) == 0:
if i % (len(train_set) // kwargs["dev_per_epoch"]) < mbatch_size:
evaluate(dev_loader)
evaluate(test_loader, dev=False)
return evaluate.best_dev
Expand All @@ -147,13 +150,13 @@ def do_random_search(given_params):

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dropout_prob", default=0.5, type=float)
parser.add_argument("--dev_per_epoch", default=9, type=int)
parser.add_argument("--dropout_prob", default=0.2, type=float)
parser.add_argument("--fc_size", default=200, type=int)
parser.add_argument("--gpu_number", default=0, type=int)
parser.add_argument("--gradient_clip", default=5, type=float)
parser.add_argument("--hidden_size", default=200, type=int)
parser.add_argument("--input_file", default="saves/model.pt", type=str)
parser.add_argument("--lr", default=5E-2, type=float)
parser.add_argument("--lr", default=1E-1, type=float)
parser.add_argument("--mbatch_size", default=64, type=int)
parser.add_argument("--n_epochs", default=30, type=int)
parser.add_argument("--n_feature_maps", default=200, type=float)
Expand All @@ -165,7 +168,7 @@ def main():
parser.add_argument("--rnn_type", choices=["lstm", "gru"], default="lstm", type=str)
parser.add_argument("--seed", default=3, type=int)
parser.add_argument("--quiet", action="store_true", default=False)
parser.add_argument("--weight_decay", default=1E-3, type=float)
parser.add_argument("--weight_decay", default=1E-4, type=float)
args = parser.parse_args()
if args.random_search:
do_random_search(vars(args))
Expand Down