Skip to content

Commit

Permalink
clean code
Browse files Browse the repository at this point in the history
  • Loading branch information
Wang committed Mar 21, 2024
1 parent 415c255 commit 35053ef
Show file tree
Hide file tree
Showing 8 changed files with 5 additions and 3,206 deletions.
51 changes: 0 additions & 51 deletions CIFAR10_LMC_search_batch_lam1e6.log

This file was deleted.

135 changes: 0 additions & 135 deletions CIFAR10_SGD_compare_baseline_nonconvergent.log

This file was deleted.

1,313 changes: 0 additions & 1,313 deletions CIFAR10_SGD_paint_unlearning_sigma_001.log

This file was deleted.

51 changes: 0 additions & 51 deletions MNIST_LMC_search_batch_lam1e6.log

This file was deleted.

135 changes: 0 additions & 135 deletions MNIST_SGD_compare_baseline_nonconvergent.log

This file was deleted.

1,313 changes: 0 additions & 1,313 deletions MNIST_SGD_paint_unlearning_sigma_001.log

This file was deleted.

38 changes: 0 additions & 38 deletions main.sh
Original file line number Diff line number Diff line change
@@ -1,51 +1,13 @@
# The script below are for SGD painting

# search burn in
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --search_burnin 1 --gpu 6 >./MNIST_LMC_search_burnin_lam1e6.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --search_burnin 1 --gpu 7 >./CIFAR10_LMC_search_burnin_lam1e6.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-7 --dataset MNIST_multiclass --search_burnin 1 --gpu 6 >./MNIST_multiclass_search_burnin_lam1e7.log 2>&1 </dev/null &

# search batch size and utility
nohup python -u main_sgd.py --lam 1e-6 --sigma 0.03 --dataset MNIST --projection 0 --search_batch 1 --gpu 0 >./MNIST_LMC_search_batch_lam1e6.log 2>&1 </dev/null &
nohup python -u main_sgd.py --lam 1e-6 --sigma 0.03 --dataset CIFAR10 --projection 0 --search_batch 1 --gpu 1 >./CIFAR10_LMC_search_batch_lam1e6.log 2>&1 </dev/null &

# compare with LMC and D2D baseline nonconvergent
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --compare_baseline_nonconvergent 1 --gpu 6 >./MNIST_SGD_compare_baseline_nonconvergent.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --compare_baseline_nonconvergent 1 --gpu 7 >./CIFAR10_SGD_compare_baseline_nonconvergent.log 2>&1 </dev/null &

# compare with LMC and D2D baseline
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --compare_baseline 1 --gpu 4 >./MNIST_SGD_compare_baseline.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --compare_baseline 1 --gpu 5 >./CIFAR10_SGD_compare_baseline.log 2>&1 </dev/null &

# compare sequential unlearning removal
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --sequential 1 --gpu 6 >./MNIST_SGD_sequential_32_64.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --sequential 1 --gpu 7 >./CIFAR10_SGD_sequential_32_64.log 2>&1 </dev/null &

# paint unlearning unlearning - sigma figure
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --paint_unlearning_sigma 1 --gpu 0 >./MNIST_SGD_paint_unlearning_sigma_001.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --paint_unlearning_sigma 1 --gpu 1 >./CIFAR10_SGD_paint_unlearning_sigma_001.log 2>&1 </dev/null &


# paint utility - epsilon figure
#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --paint_utility_epsilon 1 --gpu 1 >./MNIST_SGD_paint_utility_epsilon.log 2>&1 </dev/null &
#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --paint_utility_epsilon 1 --gpu 6 >./CIFAR10_SGD_paint_utility_epsilon.log 2>&1 </dev/null &

# paint utility - s figure
#nohup python -u main_lmc.py --lam 1e-6 --sigma 0.03 --dataset MNIST --paint_utility_s 1 --gpu 1 >./MNIST_LMC_paint_utility_s.log 2>&1 </dev/null &
#nohup python -u main_lmc.py --lam 1e-6 --sigma 0.03 --dataset CIFAR10 --paint_utility_s 1 --gpu 6 >./CIFAR10_LMC_paint_utility_s.log 2>&1 </dev/null &





# how much retrain
#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --how_much_retrain 1 --gpu 6 >./MNIST_how_much_retrain.log 2>&1 </dev/null &
#nohup python -u main_lmc.py --lam 1e-6 --dataset CIFAR10 --how_much_retrain 1 --gpu 7 >./CIFAR10_how_much_retrain.log 2>&1 </dev/null &


# calculate unlearning step between our bound and the baseline bound
#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --compare_k 1 --gpu 2 >./MNIST_LMC_compare_k.log 2>&1 </dev/null &

# find the best batch size b per gradient for sgd
#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --find_best_batch 1 --gpu 6 >./MNIST_LMC_find_best_batch.log 2>&1 </dev/null &

175 changes: 5 additions & 170 deletions main_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,14 @@
import time
import numpy as np
import argparse
import os
from sklearn.linear_model import LogisticRegression
from prettytable import PrettyTable
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
from scipy.optimize import minimize_scalar
import sympy as sp
import re

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from utils import load_features, generate_gaussian, plot_2dgaussian, plot_w_2dgaussian, create_nested_folder
from utils import load_features, create_nested_folder
from sgd import stochastic_gradient_descent_algorithm


Expand All @@ -28,7 +19,6 @@ def __init__(self, args):
self.args = args
if args.dataset == 'MNIST' or args.dataset == 'MNIST_multiclass':
self.X_train, self.X_test, self.y_train, self.y_train_onehot, self.y_test = load_features(args)

self.dim_w = 784
if args.dataset == 'MNIST':
self.num_class = 2
Expand Down Expand Up @@ -69,7 +59,6 @@ def get_metadata(self):
self.M = self.args.M
print('M lipschitz constant:'+str(self.M))
# calculate step size
#max_eta = min( 1 / self.m, 2 / self.L)
self.eta = 1 / self.L
print('step size eta:'+str(self.eta))
# calculate RDP delta
Expand All @@ -91,61 +80,7 @@ def get_metadata(self):
print('have shuffled batch idx')

def train(self):
if self.args.search_burnin:
# this is for full-batch
if self.args.dataset == 'MNIST':
sigma_list = [0.03]
burn_in_list = [1, 10, 20, 50, 100, 150, 200, 300, 500, 750, 1000]
elif self.args.dataset == 'MNIST_multiclass':
sigma_list = [0.005, 0.01]
burn_in_list = [1, 10, 20, 50, 100, 150, 200]
elif self.args.dataset == 'CIFAR10':
sigma_list = [0.03]
burn_in_list = [1, 10, 20, 50, 100, 150, 200, 300, 500, 750, 1000]
_ = self.search_burnin(sigma_list, burn_in_list)
elif self.args.search_batch:
batch_list = [1, 32, 128, 512]
burn_in_list = [1, 2, 3, 4, 5]
_ = self.search_batch(burn_in_list, batch_list)
elif self.args.compare_baseline:
# compare with the baseline (remove 1 data see sigma and utility)
epsilon_list = [0.05, 0.1, 0.5, 1, 2, 5]
batch_list = [128, 256, 0]
burn_in_list = [100, 150, 1000]
create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/baseline/')
X_train_removed, y_train_removed = self.get_removed_data(1)
target_k_list = [1]
for batch_size, burn_in in zip(batch_list, burn_in_list):
print('working on batch:'+str(batch_size))
# for each type of batch size
for target_k in target_k_list:
# for each target k
sigma_list = []
for target_epsilon in epsilon_list:
sigma_list.append(self.search_alpha(target_k, target_epsilon, batch_size))
print('batch: '+str(batch_size)+'target k:'+str(target_k) + ' sigma: '+str(sigma_list))
# if it's none, then just take this value
sigma_list = [x if x is not None else 7.450581596923812e-9 for x in sigma_list]
# know the required k, and epsilon, sigma
for epsilon, sigma in zip(epsilon_list, sigma_list):
print('working on epsilon:'+str(epsilon))
create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/')
sgd_learn_scratch_acc, mean_time, sgd_w_list = self.get_mean_performance(self.X_train, self.y_train, burn_in, sigma, None,
self.projection, batch_size, self.batch_idx, len_list = 1, return_w = True)
print('SGD learn scratch acc: ' + str(np.mean(sgd_learn_scratch_acc)))
print('SGD learn scratch acc std: ' + str(np.std(sgd_learn_scratch_acc)))
np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_learn_scratch_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_learn_scratch_acc)
sgd_unlearn_scratch_acc, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, burn_in, sigma, None,
self.projection, batch_size, self.batch_idx, len_list = 1)
print('SGD unlearn scratch acc: ' + str(np.mean(sgd_unlearn_scratch_acc)))
print('SGD unlearn scratch acc std: ' + str(np.std(sgd_unlearn_scratch_acc)))
np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_unlearn_scratch_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_unlearn_scratch_acc)
sgd_unlearn_finetune_acc, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, target_k_list[0], sigma, sgd_w_list,
self.projection, batch_size, self.batch_idx, len_list = 1)
print('SGD unlearn finetune acc: ' + str(np.mean(sgd_unlearn_finetune_acc)))
print('SGD unlearn finetune acc std: ' + str(np.std(sgd_unlearn_finetune_acc)))
np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_unlearn_finetune_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_unlearn_finetune_acc)
elif self.args.compare_baseline_nonconvergent:
if self.args.compare_baseline_nonconvergent:
# compare with the baseline nonconvergent (remove 1 data see sigma and utility)
epsilon_list = [0.05, 0.1, 0.5, 1, 2, 5]
batch_list = [128, 0]
Expand Down Expand Up @@ -187,10 +122,8 @@ def train(self):
target_epsilon = 1
create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/sequential/')
sigma = 0.03
#batch_list = [128, 256, 512, 0]
#burn_in_list = [100, 150, 200, 1000]
batch_list = [32, 64]
burn_in_list = [50, 75]
batch_list = [128, 256, 512, 0]
burn_in_list = [100, 150, 200, 1000]
for batch_size, burn_in in zip(batch_list, burn_in_list):
print('working on batch size '+str(batch_size))
self.k_list = np.zeros(num_step+1).astype(int)
Expand Down Expand Up @@ -274,45 +207,6 @@ def train(self):
print('SGD unlearn finetune acc: ' + str(np.mean(sgd_unlearn_finetune_acc)))
print('SGD unlearn finetune acc std: ' + str(np.std(sgd_unlearn_finetune_acc)))
np.save('./result/SGD/'+str(self.args.dataset)+'/paint_unlearning_sigma/sgd_acc_finetune_b'+str(batch_size)+'_sigma'+str(sigma)+'_step'+str(sgd_step+1)+'.npy', sgd_unlearn_finetune_acc)

elif self.args.paint_utility_epsilon:
epsilon_list = [0.1, 0.5, 1, 2, 5]
batch_size_list = [32, 64, 128, 256, 0]
num_remove_list = [1, 50, 100]
create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/paint_utility_epsilon/')
for batch_size in batch_size_list:
accuracy_scratch_D, mean_time, w_list = self.get_mean_performance(self.X_train, self.y_train, self.args.burn_in, self.args.sigma,
None, self.args.projection, batch_size, self.batch_idx,
len_list = 1, return_w = True, )
np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/w_from_scratch_b'+str(batch_size)+'.npy', w_list)
np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/acc_scratch_D_b'+str(batch_size)+'.npy', accuracy_scratch_D)
# calculate K
K_dict, _ = self.search_finetune_step(self.args.sigma, epsilon_list, batch_size_list)
np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/K_list.npy', K_dict)
for remove_idx, num_remove in enumerate(num_remove_list):
K_list = []
for epsilon in epsilon_list:
X_train_removed, y_train_removed = self.get_removed_data(num_remove_list[remove_idx])
accuracy_finetune, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, K_dict[num_remove_list[remove_idx]][epsilon], self.args.sigma, w_list)
create_nested_folder('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/'+str(num_remove)+'/')
np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/'+str(num_remove)+'/acc_finetune_epsilon'+str(epsilon)+'.npy', accuracy_finetune)
K_list.append(K_dict[num_remove_list[0]][epsilon])

elif self.args.how_much_retrain == 1:
sigma_list = [0.05, 0.1, 0.2, 0.5, 1]
if self.args.dataset == 'MNIST':
K_list = [1301, 1031, 751, 351, 1]
elif self.args.dataset =='CIFAR10':
K_list = [1541, 1251, 951, 521, 151]
num_remove_list = [100]
X_train_removed, y_train_removed = self.get_removed_data(num_remove_list[0])
create_nested_folder('./result/LMC/'+str(self.args.dataset)+'/retrain/')
for sigma_idx, sigma in enumerate(sigma_list):
accuracy_scratch_D, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, K_list[sigma_idx], sigma, None, len_list = 1)
np.save('./result/LMC/'+str(self.args.dataset)+'/retrain/'+str(sigma)+'_acc_scratch_D.npy', accuracy_scratch_D)
print('sigma:'+str(sigma))
print('mean acc:'+str(np.mean(accuracy_scratch_D)))
print('std acc:'+str(np.std(accuracy_scratch_D)))
else:
print('check!')

Expand Down Expand Up @@ -498,60 +392,6 @@ def get_mean_performance(self, X, y, step, sigma, w_list, projection, batch_size
return trial_list, mean_time, new_w_list
else:
return trial_list, mean_time

def search_burnin(self, sigma_list, burn_in_list, fig_path = '_search_burnin.pdf'):
acc_dict = {}
for sigma in sigma_list:
acc_list = []
this_w_list = None
for idx in range(len(burn_in_list)):
if idx == 0:
step = burn_in_list[idx]
else:
step = burn_in_list[idx] - burn_in_list[idx - 1]
accuracy, _, new_w_list = self.get_mean_performance(self.X_train, self.y_train, step, sigma, this_w_list, return_w = True,
projection = self.projection, batch_size = self.batch_size, batch_idx = self.batch_idx)
this_w_list = new_w_list
acc_list.append(np.mean(accuracy))
print(acc_list)
plt.plot(burn_in_list, acc_list, label='sigma :'+str(sigma))
acc_dict[sigma] = acc_list
for i in range(len(burn_in_list)):
plt.text(burn_in_list[i], acc_list[i], f'{acc_list[i]:.3f}', ha='right', va='bottom')
plt.legend()
plt.title(str(self.args.dataset)+'search burn in')
plt.xlabel('burn in steps')
plt.ylabel('accuracy')
plt.savefig(str(self.args.dataset)+fig_path)
plt.clf()
return acc_dict

def search_batch(self, burn_in_list, batch_list, fig_path = '_search_batch.pdf'):
acc_dict = {}
for batch in batch_list:
acc_list = []
this_w_list = None
for idx in range(len(burn_in_list)):
if idx == 0:
step = burn_in_list[idx]
else:
step = burn_in_list[idx] - burn_in_list[idx - 1]
accuracy, _, new_w_list = self.get_mean_performance(self.X_train, self.y_train, step, self.args.sigma, this_w_list, return_w = True,
projection = self.projection, batch_size = batch, batch_idx = self.batch_idx)
this_w_list = new_w_list
acc_list.append(np.mean(accuracy))
print(acc_list)
plt.plot(burn_in_list, acc_list, label='batch:'+str(batch))
acc_dict[batch] = acc_list
for i in range(len(burn_in_list)):
plt.text(burn_in_list[i], acc_list[i], f'{acc_list[i]:.3f}', ha='right', va='bottom')
plt.legend()
plt.title(str(self.args.dataset)+' search burning in steps')
plt.xlabel('burn in steps')
plt.ylabel('accuracy')
plt.savefig(str(self.args.dataset)+str(batch)+fig_path)
plt.clf()
return acc_dict

def test_accuracy(self, w_list):
w = torch.tensor(w_list[0])
Expand Down Expand Up @@ -593,19 +433,14 @@ def main():
parser.add_argument('--sigma', type = float, default = 0.03, help = 'the parameter sigma')
parser.add_argument('--burn_in', type = int, default = 1000, help = 'burn in step number of LMC')

parser.add_argument('--search_burnin', type = int, default = 0, help = 'whether grid search to paint for burn-in')
parser.add_argument('--search_batch', type = int, default = 0, help = 'paint the batch size utility - s figure')
parser.add_argument('--paint_utility_epsilon', type = int, default = 0, help = 'paint utility - epsilon figure')
parser.add_argument('--paint_unlearning_sigma', type = int, default = 0, help = 'paint unlearning utility - sigma figure')
parser.add_argument('--how_much_retrain', type = int, default = 0, help = 'supplementary for unlearning sigma')
parser.add_argument('--compare_baseline', type = int, default = 0, help = 'compare with baseline')
parser.add_argument('--compare_baseline_nonconvergent', type = int, default = 0, help = 'compare with the baselines with nonconvergent calculation')
parser.add_argument('--sequential', type = int, default = 0, help = 'sequential unlearni')
args = parser.parse_args()
print(args)
runner = Runner(args)
runner.get_metadata()
#import pdb; pdb.set_trace()
runner.train()

if __name__ == '__main__':
main()

0 comments on commit 35053ef

Please sign in to comment.