From 213519e17fd8dd64ffcb3106a2eb41e6fb60661b Mon Sep 17 00:00:00 2001 From: nnnyt <793313994@qq.com> Date: Tue, 2 Mar 2021 22:31:07 +0800 Subject: [PATCH] add NCD model and MAAT strategy --- CAT/dataset/train_dataset.py | 7 +- CAT/model/IRT.py | 59 +++++++- CAT/model/NCD.py | 276 ++++++++++++++++++++++++++++++++++ CAT/model/__init__.py | 3 +- CAT/strategy/KLI_strategy.py | 5 +- CAT/strategy/MAAT_strategy.py | 38 +++++ CAT/strategy/MFI_strategy.py | 1 - CAT/strategy/__init__.py | 3 +- 8 files changed, 377 insertions(+), 15 deletions(-) create mode 100644 CAT/model/NCD.py create mode 100644 CAT/strategy/MAAT_strategy.py diff --git a/CAT/dataset/train_dataset.py b/CAT/dataset/train_dataset.py index e7af6c0..6f2d586 100644 --- a/CAT/dataset/train_dataset.py +++ b/CAT/dataset/train_dataset.py @@ -1,3 +1,4 @@ +import torch from torch.utils import data try: @@ -25,7 +26,11 @@ def __init__(self, data, concept_map, def __getitem__(self, item): sid, qid, score = self.raw_data[item] - return sid, qid, score + concepts = self.concept_map[qid] + concepts_emb = [0.] * self.num_concepts + for concept in concepts: + concepts_emb[concept] = 1.0 + return sid, qid, torch.Tensor(concepts_emb), score def __len__(self): return len(self.raw_data) \ No newline at end of file diff --git a/CAT/model/IRT.py b/CAT/model/IRT.py index 250a01c..0693b99 100644 --- a/CAT/model/IRT.py +++ b/CAT/model/IRT.py @@ -66,7 +66,7 @@ def train(self, train_data: TrainDataset): for ep in range(1, epochs + 1): loss = 0.0 log_step = 1 - for cnt, (student_ids, question_ids, labels) in enumerate(train_loader): + for cnt, (student_ids, question_ids, _, labels) in enumerate(train_loader): student_ids = student_ids.to(device) question_ids = question_ids.to(device) labels = labels.to(device).float() @@ -110,7 +110,7 @@ def adaptest_update(self, adaptest_data: AdapTestDataset): for ep in range(1, epochs + 1): loss = 0.0 log_steps = 100 - for cnt, (student_ids, question_ids, labels) in enumerate(dataloader): + for cnt, (student_ids, question_ids, _, labels) in enumerate(dataloader): student_ids = student_ids.to(device) question_ids = question_ids.to(device) labels = labels.to(device).float() @@ -221,11 +221,11 @@ def get_kli(self, student_id, question_id, n): device = self.config['device'] sid = torch.LongTensor([student_id]).to(device) qid = torch.LongTensor([question_id]).to(device) - theta = self.model.theta(sid).clone().detach().numpy()[0][0] - alpha = self.model.alpha(qid).clone().detach().numpy()[0][0] - beta = self.model.beta(qid).clone().detach().numpy()[0][0] - pred_estimate = alpha * theta + beta - pred_estimate = 1 / (1 + np.exp(-pred_estimate)) + theta = self.model.theta(sid).clone().detach().numpy()[0] # (10, ) + alpha = self.model.alpha(qid).clone().detach().numpy()[0] # (10, ) + beta = self.model.beta(qid).clone().detach().numpy()[0][0] # float value + # pred_estimate = 1 / (1 + np.exp(-np.dot(alpha, theta.T) - beta)) + pred_estimate = self.model(sid, qid).data.numpy()[0][0] # float value c = 3 low = theta - c / np.sqrt(n) high = theta + c / np.sqrt(n) @@ -248,6 +248,51 @@ def get_fisher(self, student_id, question_id): q = 1 - pred fisher_info = (q*pred*(alpha * alpha.T)).numpy() return fisher_info + + def expected_model_change(self, sid: int, qid: int, adaptest_data: AdapTestDataset): + + epochs = self.config['num_epochs'] + lr = self.config['learning_rate'] + device = self.config['device'] + optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) + + for name, param in self.model.named_parameters(): + if 'theta' not in name: + param.requires_grad = False + + original_weights = self.model.theta.weight.data.clone() + + student_id = torch.LongTensor([sid]).to(device) + question_id = torch.LongTensor([qid]).to(device) + correct = torch.LongTensor([1]).to(device).float() + wrong = torch.LongTensor([0]).to(device).float() + + for ep in range(epochs): + optimizer.zero_grad() + pred = self.model(student_id, question_id) + loss = self._loss_function(pred, correct) + loss.backward() + optimizer.step() + + pos_weights = self.model.theta.weight.data.clone() + self.model.theta.weight.data.copy_(original_weights) + + for ep in range(epochs): + optimizer.zero_grad() + pred = self.model(student_id, question_id) + loss = self._loss_function(pred, wrong) + loss.backward() + optimizer.step() + + neg_weights = self.model.theta.weight.data.clone() + self.model.theta.weight.data.copy_(original_weights) + + for param in self.model.parameters(): + param.requires_grad = True + + pred = self.model(student_id, question_id).item() + return pred * torch.norm(pos_weights - original_weights).item() + \ + (1 - pred) * torch.norm(neg_weights - original_weights).item() diff --git a/CAT/model/NCD.py b/CAT/model/NCD.py new file mode 100644 index 0000000..e223a6b --- /dev/null +++ b/CAT/model/NCD.py @@ -0,0 +1,276 @@ +import torch +import logging +import numpy as np +import torch.nn as nn +import torch.utils.data as data +from sklearn.metrics import roc_auc_score + +from CAT.model.abstract_model import AbstractModel +from CAT.dataset import AdapTestDataset, TrainDataset, Dataset + + +class NCD(nn.Module): + ''' + NeuralCDM + ''' + def __init__(self, student_n, exer_n, knowledge_n): + self.knowledge_dim = knowledge_n + self.exer_n = exer_n + self.emb_num = student_n + self.stu_dim = self.knowledge_dim + self.prednet_input_len = self.knowledge_dim + self.prednet_len1, self.prednet_len2 = 512, 256 # changeable + + super(NCD, self).__init__() + + # network structure + self.student_emb = nn.Embedding(self.emb_num, self.stu_dim) + self.k_difficulty = nn.Embedding(self.exer_n, self.knowledge_dim) + self.e_discrimination = nn.Embedding(self.exer_n, 1) + self.prednet_full1 = nn.Linear(self.prednet_input_len, self.prednet_len1) + self.drop_1 = nn.Dropout(p=0.5) + self.prednet_full2 = nn.Linear(self.prednet_len1, self.prednet_len2) + self.drop_2 = nn.Dropout(p=0.5) + self.prednet_full3 = nn.Linear(self.prednet_len2, 1) + + # initialization + for name, param in self.named_parameters(): + if 'weight' in name: + nn.init.xavier_normal_(param) + + def forward(self, stu_id, exer_id, kn_emb): + ''' + :param stu_id: LongTensor + :param exer_id: LongTensor + :param kn_emb: FloatTensor, the knowledge relevancy vectors + :return: FloatTensor, the probabilities of answering correctly + ''' + # before prednet + stu_emb = torch.sigmoid(self.student_emb(stu_id)) + k_difficulty = torch.sigmoid(self.k_difficulty(exer_id)) + e_discrimination = torch.sigmoid(self.e_discrimination(exer_id)) * 10 + # prednet + input_x = e_discrimination * (stu_emb - k_difficulty) * kn_emb + input_x = self.drop_1(torch.sigmoid(self.prednet_full1(input_x))) + input_x = self.drop_2(torch.sigmoid(self.prednet_full2(input_x))) + output = torch.sigmoid(self.prednet_full3(input_x)) + + return output + + def apply_clipper(self): + clipper = NoneNegClipper() + self.prednet_full1.apply(clipper) + self.prednet_full2.apply(clipper) + self.prednet_full3.apply(clipper) + + def get_knowledge_status(self, stu_id): + stat_emb = torch.sigmoid(self.student_emb(stu_id)) + return stat_emb.data + + def get_exer_params(self, exer_id): + k_difficulty = torch.sigmoid(self.k_difficulty(exer_id)) + e_discrimination = torch.sigmoid(self.e_discrimination(exer_id)) * 10 + return k_difficulty.data, e_discrimination.data + + +class NoneNegClipper(object): + def __init__(self): + super(NoneNegClipper, self).__init__() + + def __call__(self, module): + if hasattr(module, 'weight'): + w = module.weight.data + a = torch.relu(torch.neg(w)) + w.add_(a) + + +class NCDModel(AbstractModel): + + def __init__(self, **config): + super().__init__() + self.config = config + self.model = None + + @property + def name(self): + return 'NeuralCD Model' + + def init_model(self, data: Dataset): + self.model = NCD(data.num_students, data.num_questions, data.num_concepts) + + def train(self, train_data: TrainDataset): + lr = self.config['learning_rate'] + batch_size = self.config['batch_size'] + epochs = self.config['num_epochs'] + device = self.config['device'] + self.model.to(device) + logging.info('train on {}'.format(device)) + + train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True) + optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) + + for ep in range(1, epochs + 1): + loss = 0.0 + log_step = 1 + for cnt, (student_ids, question_ids, concepts_emb, labels) in enumerate(train_loader): + student_ids = student_ids.to(device) + question_ids = question_ids.to(device) + concepts_emb = concepts_emb.to(device) + labels = labels.to(device) + pred = self.model(student_ids, question_ids, concepts_emb) + bz_loss = self._loss_function(pred, labels) + optimizer.zero_grad() + bz_loss.backward() + optimizer.step() + self.model.apply_clipper() + loss += bz_loss.data.float() + if cnt % log_step == 0: + logging.info('Epoch [{}] Batch [{}]: loss={:.5f}'.format(ep, cnt, loss / cnt)) + + def _loss_function(self, pred, real): + pred_0 = torch.ones(pred.size()).to(self.config['device']) - pred + output = torch.cat((pred_0, pred), 1) + criteria = nn.NLLLoss() + return criteria(torch.log(output), real) + + def adaptest_save(self, path): + """ + Save the model. Only save the parameters of questions(alpha, beta) + """ + model_dict = self.model.state_dict() + model_dict = {k:v for k,v in model_dict.items() if 'student' not in k} + torch.save(model_dict, path) + + def adaptest_load(self, path): + """ + Reload the saved model + """ + self.model.load_state_dict(torch.load(path), strict=False) + self.model.to(self.config['device']) + + def adaptest_update(self, adaptest_data: AdapTestDataset): + lr = self.config['learning_rate'] + batch_size = self.config['batch_size'] + epochs = self.config['num_epochs'] + device = self.config['device'] + optimizer = torch.optim.Adam(self.model.student_emb.parameters(), lr=lr) + + tested_dataset = adaptest_data.get_tested_dataset(last=True) + dataloader = torch.utils.data.DataLoader(tested_dataset, batch_size=batch_size, shuffle=True) + + for ep in range(1, epochs + 1): + loss = 0.0 + log_steps = 100 + for cnt, (student_ids, question_ids, concepts_emb, labels) in enumerate(dataloader): + student_ids = student_ids.to(device) + question_ids = question_ids.to(device) + labels = labels.to(device) + pred = self.model(student_ids, question_ids, concepts_emb) + bz_loss = self._loss_function(pred, labels) + optimizer.zero_grad() + bz_loss.backward() + optimizer.step() + self.model.apply_clipper() + loss += bz_loss.data.float() + # if cnt % log_steps == 0: + # print('Epoch [{}] Batch [{}]: loss={:.3f}'.format(ep, cnt, loss / cnt)) + + def evaluate(self, adaptest_data: AdapTestDataset): + data = adaptest_data.data + concept_map = adaptest_data.concept_map + device = self.config['device'] + + real = [] + pred = [] + with torch.no_grad(): + self.model.eval() + for sid in data: + student_ids = [sid] * len(data[sid]) + question_ids = list(data[sid].keys()) + concepts_embs = [] + for qid in question_ids: + concepts = concept_map[qid] + concepts_emb = [0.] * adaptest_data.num_concepts + for concept in concepts: + concepts_emb[concept] = 1.0 + concepts_embs.append(concepts_emb) + real += [data[sid][qid] for qid in question_ids] + student_ids = torch.LongTensor(student_ids).to(device) + question_ids = torch.LongTensor(question_ids).to(device) + concepts_embs = torch.Tensor(concepts_embs).to(device) + output = self.model(student_ids, question_ids, concepts_embs).view(-1) + pred += output.tolist() + self.model.train() + + coverages = [] + for sid in data: + all_concepts = set() + tested_concepts = set() + for qid in data[sid]: + all_concepts.update(set(concept_map[qid])) + for qid in adaptest_data.tested[sid]: + tested_concepts.update(set(concept_map[qid])) + coverage = len(tested_concepts) / len(all_concepts) + coverages.append(coverage) + cov = sum(coverages) / len(coverages) + + real = np.array(real) + pred = np.array(pred) + auc = roc_auc_score(real, pred) + + return { + 'auc': auc, + 'cov': cov, + } + + def expected_model_change(self, sid: int, qid: int, adaptest_data: AdapTestDataset): + + epochs = self.config['num_epochs'] + lr = self.config['learning_rate'] + device = self.config['device'] + optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) + + for name, param in self.model.named_parameters(): + if 'student' not in name: + param.requires_grad = False + + original_weights = self.model.student_emb.weight.data.clone() + + student_id = torch.LongTensor([sid]).to(device) + question_id = torch.LongTensor([qid]).to(device) + concepts = adaptest_data.concept_map[qid] + concepts_emb = [0.] * adaptest_data.num_concepts + for concept in concepts: + concepts_emb[concept] = 1.0 + concepts_emb = torch.Tensor([concepts_emb]).to(device) + correct = torch.LongTensor([1]).to(device) + wrong = torch.LongTensor([0]).to(device) + + for ep in range(epochs): + optimizer.zero_grad() + pred = self.model(student_id, question_id, concepts_emb) + loss = self._loss_function(pred, correct) + loss.backward() + optimizer.step() + # self.model.apply_clipper() + + pos_weights = self.model.student_emb.weight.data.clone() + self.model.student_emb.weight.data.copy_(original_weights) + + for ep in range(epochs): + optimizer.zero_grad() + pred = self.model(student_id, question_id, concepts_emb) + loss = self._loss_function(pred, wrong) + loss.backward() + optimizer.step() + # self.model.apply_clipper() + + neg_weights = self.model.student_emb.weight.data.clone() + self.model.student_emb.weight.data.copy_(original_weights) + + for param in self.model.parameters(): + param.requires_grad = True + + pred = self.model(student_id, question_id, concepts_emb).item() + return pred * torch.norm(pos_weights - original_weights).item() + \ + (1 - pred) * torch.norm(neg_weights - original_weights).item() \ No newline at end of file diff --git a/CAT/model/__init__.py b/CAT/model/__init__.py index 79a423a..690356c 100644 --- a/CAT/model/__init__.py +++ b/CAT/model/__init__.py @@ -1,2 +1,3 @@ from .abstract_model import AbstractModel -from .IRT import IRTModel \ No newline at end of file +from .IRT import IRTModel +from .NCD import NCDModel \ No newline at end of file diff --git a/CAT/strategy/KLI_strategy.py b/CAT/strategy/KLI_strategy.py index 7420635..0a4d1b6 100644 --- a/CAT/strategy/KLI_strategy.py +++ b/CAT/strategy/KLI_strategy.py @@ -1,5 +1,4 @@ import numpy as np -import torch from CAT.strategy.abstract_strategy import AbstractStrategy from CAT.model import AbstractModel @@ -23,9 +22,7 @@ def adaptest_select(self, model: AbstractModel, adaptest_data: AdapTestDataset): for sid in range(adaptest_data.num_students): theta = model.get_theta(sid) untested_questions = np.array(list(adaptest_data.untested[sid])) - untested_kli = [] - for qid in untested_questions: - untested_kli.append(model.get_kli(sid, qid, n)) + untested_kli = [model.get_kli(sid, qid, n) for qid in untested_questions] j = np.argmax(untested_kli) selection[sid] = untested_questions[j] return selection \ No newline at end of file diff --git a/CAT/strategy/MAAT_strategy.py b/CAT/strategy/MAAT_strategy.py new file mode 100644 index 0000000..8f635b9 --- /dev/null +++ b/CAT/strategy/MAAT_strategy.py @@ -0,0 +1,38 @@ +import numpy as np + +from CAT.strategy.abstract_strategy import AbstractStrategy +from CAT.model import AbstractModel +from CAT.dataset import AdapTestDataset + + +class MAATStrategy(AbstractStrategy): + + def __init__(self, n_candidates=10): + super().__init__() + self.n_candidates = n_candidates + + @property + def name(self): + return 'Model Agnostic Adaptive Testing' + + def _compute_coverage_gain(self, sid, qid, adaptest_data: AdapTestDataset): + concept_cnt = {} + for q in adaptest_data.data[sid]: + for c in adaptest_data.concept_map[q]: + concept_cnt[c] = 0 + for q in list(adaptest_data.tested[sid]) + [qid]: + for c in adaptest_data.concept_map[q]: + concept_cnt[c] += 1 + return (sum(cnt / (cnt + 1) for c, cnt in concept_cnt.items()) + / sum(1 for c in concept_cnt)) + + def adaptest_select(self, model: AbstractModel, adaptest_data: AdapTestDataset): + assert hasattr(model, 'expected_model_change'), \ + 'the models must implement expected_model_change method' + selection = {} + for sid in range(adaptest_data.num_students): + untested_questions = np.array(list(adaptest_data.untested[sid])) + emc_arr = [model.expected_model_change(sid, qid, adaptest_data) for qid in untested_questions] + candidates = untested_questions[np.argsort(emc_arr)[::-1][:self.n_candidates]] + selection[sid] = max(candidates, key=lambda qid: self._compute_coverage_gain(sid, qid, adaptest_data)) + return selection \ No newline at end of file diff --git a/CAT/strategy/MFI_strategy.py b/CAT/strategy/MFI_strategy.py index f4fde54..4af35dc 100644 --- a/CAT/strategy/MFI_strategy.py +++ b/CAT/strategy/MFI_strategy.py @@ -1,5 +1,4 @@ import numpy as np -import torch from CAT.strategy.abstract_strategy import AbstractStrategy from CAT.model import AbstractModel diff --git a/CAT/strategy/__init__.py b/CAT/strategy/__init__.py index 6263c57..3eeb5a2 100644 --- a/CAT/strategy/__init__.py +++ b/CAT/strategy/__init__.py @@ -1,4 +1,5 @@ from .abstract_strategy import AbstractStrategy from .random_strategy import RandomStrategy from .MFI_strategy import MFIStrategy -from .KLI_strategy import KLIStrategy \ No newline at end of file +from .KLI_strategy import KLIStrategy +from .MAAT_strategy import MAATStrategy \ No newline at end of file