clean code

Graph-COM · Mar 21, 2024 · 35053ef · 35053ef
1 parent 415c255
commit 35053ef
Show file tree

Hide file tree

Showing 8 changed files with 5 additions and 3,206 deletions.
diff --git a/CIFAR10_LMC_search_batch_lam1e6.log b/CIFAR10_LMC_search_batch_lam1e6.log
diff --git a/CIFAR10_SGD_compare_baseline_nonconvergent.log b/CIFAR10_SGD_compare_baseline_nonconvergent.log
diff --git a/CIFAR10_SGD_paint_unlearning_sigma_001.log b/CIFAR10_SGD_paint_unlearning_sigma_001.log
diff --git a/MNIST_LMC_search_batch_lam1e6.log b/MNIST_LMC_search_batch_lam1e6.log
diff --git a/MNIST_SGD_compare_baseline_nonconvergent.log b/MNIST_SGD_compare_baseline_nonconvergent.log
diff --git a/MNIST_SGD_paint_unlearning_sigma_001.log b/MNIST_SGD_paint_unlearning_sigma_001.log
diff --git a/main.sh b/main.sh
@@ -1,51 +1,13 @@
 # The script below are for SGD painting
 
-# search burn in
-#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --search_burnin 1 --gpu 6 >./MNIST_LMC_search_burnin_lam1e6.log 2>&1 </dev/null &
-#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --search_burnin 1 --gpu 7 >./CIFAR10_LMC_search_burnin_lam1e6.log 2>&1 </dev/null &
-#nohup python -u main_sgd.py --lam 1e-7 --dataset MNIST_multiclass --search_burnin 1 --gpu 6 >./MNIST_multiclass_search_burnin_lam1e7.log 2>&1 </dev/null &
-
-# search batch size and utility
-nohup python -u main_sgd.py --lam 1e-6 --sigma 0.03 --dataset MNIST --projection 0 --search_batch 1 --gpu 0 >./MNIST_LMC_search_batch_lam1e6.log 2>&1 </dev/null &
-nohup python -u main_sgd.py --lam 1e-6 --sigma 0.03 --dataset CIFAR10 --projection 0 --search_batch 1 --gpu 1 >./CIFAR10_LMC_search_batch_lam1e6.log 2>&1 </dev/null &
-
 # compare with LMC and D2D baseline nonconvergent
 #nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --compare_baseline_nonconvergent 1 --gpu 6 >./MNIST_SGD_compare_baseline_nonconvergent.log 2>&1 </dev/null &
 #nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --compare_baseline_nonconvergent 1 --gpu 7 >./CIFAR10_SGD_compare_baseline_nonconvergent.log 2>&1 </dev/null &
 
-# compare with LMC and D2D baseline
-#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --compare_baseline 1 --gpu 4 >./MNIST_SGD_compare_baseline.log 2>&1 </dev/null &
-#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --compare_baseline 1 --gpu 5 >./CIFAR10_SGD_compare_baseline.log 2>&1 </dev/null &
-
 # compare sequential unlearning removal
 #nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --sequential 1 --gpu 6 >./MNIST_SGD_sequential_32_64.log 2>&1 </dev/null &
 #nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --sequential 1 --gpu 7 >./CIFAR10_SGD_sequential_32_64.log 2>&1 </dev/null &
 
 # paint unlearning unlearning - sigma figure
 #nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --projection 0 --paint_unlearning_sigma 1 --gpu 0 >./MNIST_SGD_paint_unlearning_sigma_001.log 2>&1 </dev/null &
 #nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --projection 0 --paint_unlearning_sigma 1 --gpu 1 >./CIFAR10_SGD_paint_unlearning_sigma_001.log 2>&1 </dev/null &
-
-
-# paint utility - epsilon figure
-#nohup python -u main_sgd.py --lam 1e-6 --dataset MNIST --paint_utility_epsilon 1 --gpu 1 >./MNIST_SGD_paint_utility_epsilon.log 2>&1 </dev/null &
-#nohup python -u main_sgd.py --lam 1e-6 --dataset CIFAR10 --paint_utility_epsilon 1 --gpu 6 >./CIFAR10_SGD_paint_utility_epsilon.log 2>&1 </dev/null &
-
-# paint utility - s figure
-#nohup python -u main_lmc.py --lam 1e-6 --sigma 0.03 --dataset MNIST --paint_utility_s 1 --gpu 1 >./MNIST_LMC_paint_utility_s.log 2>&1 </dev/null &
-#nohup python -u main_lmc.py --lam 1e-6 --sigma 0.03 --dataset CIFAR10 --paint_utility_s 1 --gpu 6 >./CIFAR10_LMC_paint_utility_s.log 2>&1 </dev/null &
-
-
-
-
-
-# how much retrain
-#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --how_much_retrain 1 --gpu 6 >./MNIST_how_much_retrain.log 2>&1 </dev/null &
-#nohup python -u main_lmc.py --lam 1e-6 --dataset CIFAR10 --how_much_retrain 1 --gpu 7 >./CIFAR10_how_much_retrain.log 2>&1 </dev/null &
-
-
-# calculate unlearning step between our bound and the baseline bound
-#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --compare_k 1 --gpu 2 >./MNIST_LMC_compare_k.log 2>&1 </dev/null &
-
-# find the best batch size b per gradient for sgd
-#nohup python -u main_lmc.py --lam 1e-6 --dataset MNIST --find_best_batch 1 --gpu 6 >./MNIST_LMC_find_best_batch.log 2>&1 </dev/null &
-
diff --git a/main_sgd.py b/main_sgd.py
@@ -2,23 +2,14 @@
 import time
 import numpy as np
 import argparse
-import os
-from sklearn.linear_model import LogisticRegression
-from prettytable import PrettyTable
 import matplotlib.pyplot as plt
 from tqdm import tqdm
 import math
 from scipy.optimize import minimize_scalar
-import sympy as sp
-import re
 
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-from torchvision import datasets, transforms
 
-from utils import load_features, generate_gaussian, plot_2dgaussian, plot_w_2dgaussian, create_nested_folder
+from utils import load_features, create_nested_folder
 from sgd import stochastic_gradient_descent_algorithm
 
 
@@ -28,7 +19,6 @@ def __init__(self, args):
         self.args = args
         if args.dataset == 'MNIST' or args.dataset == 'MNIST_multiclass':
             self.X_train, self.X_test, self.y_train, self.y_train_onehot, self.y_test = load_features(args)
-
             self.dim_w = 784
             if args.dataset == 'MNIST':
                 self.num_class = 2
@@ -69,7 +59,6 @@ def get_metadata(self):
         self.M = self.args.M
         print('M lipschitz constant:'+str(self.M))
         # calculate step size
-        #max_eta = min( 1 / self.m, 2 / self.L) 
         self.eta = 1 / self.L
         print('step size eta:'+str(self.eta))
         # calculate RDP delta
@@ -91,61 +80,7 @@ def get_metadata(self):
         print('have shuffled batch idx')
 
     def train(self):
-        if self.args.search_burnin:
-            # this is for full-batch
-            if self.args.dataset == 'MNIST':
-                sigma_list = [0.03]
-                burn_in_list = [1, 10, 20, 50, 100, 150, 200, 300, 500, 750, 1000]
-            elif self.args.dataset == 'MNIST_multiclass':
-                sigma_list = [0.005, 0.01]
-                burn_in_list = [1, 10, 20, 50, 100, 150, 200]
-            elif self.args.dataset == 'CIFAR10':
-                sigma_list = [0.03]
-                burn_in_list = [1, 10, 20, 50, 100, 150, 200, 300, 500, 750, 1000]
-            _ = self.search_burnin(sigma_list, burn_in_list)
-        elif self.args.search_batch:
-            batch_list = [1, 32, 128, 512]
-            burn_in_list = [1, 2, 3, 4, 5]
-            _ = self.search_batch(burn_in_list, batch_list)
-        elif self.args.compare_baseline:
-            # compare with the baseline (remove 1 data see sigma and utility)
-            epsilon_list = [0.05, 0.1, 0.5, 1, 2, 5]
-            batch_list = [128, 256, 0]
-            burn_in_list = [100, 150, 1000]
-            create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/baseline/')
-            X_train_removed, y_train_removed = self.get_removed_data(1)
-            target_k_list = [1]
-            for batch_size, burn_in in zip(batch_list, burn_in_list):
-                print('working on batch:'+str(batch_size))
-                # for each type of batch size
-                for target_k in target_k_list:
-                    # for each target k
-                    sigma_list = []
-                    for target_epsilon in epsilon_list:
-                        sigma_list.append(self.search_alpha(target_k, target_epsilon, batch_size))
-                    print('batch: '+str(batch_size)+'target k:'+str(target_k) + ' sigma: '+str(sigma_list))
-                # if it's none, then just take this value
-                sigma_list = [x if x is not None else 7.450581596923812e-9 for x in sigma_list]
-                # know the required k, and epsilon, sigma
-                for epsilon, sigma in zip(epsilon_list, sigma_list):
-                    print('working on epsilon:'+str(epsilon))
-                    create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/')
-                    sgd_learn_scratch_acc, mean_time, sgd_w_list = self.get_mean_performance(self.X_train, self.y_train, burn_in, sigma, None,
-                                                                                             self.projection, batch_size, self.batch_idx, len_list = 1, return_w = True)
-                    print('SGD learn scratch acc: ' + str(np.mean(sgd_learn_scratch_acc)))
-                    print('SGD learn scratch acc std: ' + str(np.std(sgd_learn_scratch_acc)))
-                    np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_learn_scratch_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_learn_scratch_acc)
-                    sgd_unlearn_scratch_acc, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, burn_in, sigma, None, 
-                                                                                   self.projection, batch_size, self.batch_idx, len_list = 1)
-                    print('SGD unlearn scratch acc: ' + str(np.mean(sgd_unlearn_scratch_acc)))
-                    print('SGD unlearn scratch acc std: ' + str(np.std(sgd_unlearn_scratch_acc)))
-                    np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_unlearn_scratch_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_unlearn_scratch_acc)
-                    sgd_unlearn_finetune_acc, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, target_k_list[0], sigma, sgd_w_list,
-                                                                                    self.projection, batch_size, self.batch_idx, len_list = 1)
-                    print('SGD unlearn finetune acc: ' + str(np.mean(sgd_unlearn_finetune_acc)))
-                    print('SGD unlearn finetune acc std: ' + str(np.std(sgd_unlearn_finetune_acc)))
-                    np.save('./result/SGD/'+str(self.args.dataset)+'/baseline/'+str(target_k)+'/sgd_acc_unlearn_finetune_b'+str(batch_size)+'_eps'+str(epsilon)+'.npy', sgd_unlearn_finetune_acc)
-        elif self.args.compare_baseline_nonconvergent:
+        if self.args.compare_baseline_nonconvergent:
             # compare with the baseline nonconvergent (remove 1 data see sigma and utility)
             epsilon_list = [0.05, 0.1, 0.5, 1, 2, 5]
             batch_list = [128, 0]
@@ -187,10 +122,8 @@ def train(self):
             target_epsilon = 1
             create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/sequential/')
             sigma = 0.03
-            #batch_list = [128, 256, 512, 0]
-            #burn_in_list = [100, 150, 200, 1000]
-            batch_list = [32, 64]
-            burn_in_list = [50, 75]
+            batch_list = [128, 256, 512, 0]
+            burn_in_list = [100, 150, 200, 1000]
             for batch_size, burn_in in zip(batch_list, burn_in_list):
                 print('working on batch size '+str(batch_size))
                 self.k_list = np.zeros(num_step+1).astype(int)
@@ -274,45 +207,6 @@ def train(self):
                         print('SGD unlearn finetune acc: ' + str(np.mean(sgd_unlearn_finetune_acc)))
                         print('SGD unlearn finetune acc std: ' + str(np.std(sgd_unlearn_finetune_acc)))
                         np.save('./result/SGD/'+str(self.args.dataset)+'/paint_unlearning_sigma/sgd_acc_finetune_b'+str(batch_size)+'_sigma'+str(sigma)+'_step'+str(sgd_step+1)+'.npy', sgd_unlearn_finetune_acc)
-
-        elif self.args.paint_utility_epsilon:
-            epsilon_list = [0.1, 0.5, 1, 2, 5]
-            batch_size_list = [32, 64, 128, 256, 0]
-            num_remove_list = [1, 50, 100]
-            create_nested_folder('./result/SGD/'+str(self.args.dataset)+'/paint_utility_epsilon/')
-            for batch_size in batch_size_list:
-                accuracy_scratch_D, mean_time, w_list = self.get_mean_performance(self.X_train, self.y_train, self.args.burn_in, self.args.sigma, 
-                                                                              None, self.args.projection, batch_size, self.batch_idx, 
-                                                                              len_list = 1, return_w = True, )
-                np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/w_from_scratch_b'+str(batch_size)+'.npy', w_list)
-                np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/acc_scratch_D_b'+str(batch_size)+'.npy', accuracy_scratch_D)
-                # calculate K
-                K_dict, _ = self.search_finetune_step(self.args.sigma, epsilon_list, batch_size_list)
-                np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/K_list.npy', K_dict)
-                for remove_idx, num_remove in enumerate(num_remove_list):
-                    K_list = []
-                    for epsilon in epsilon_list:
-                        X_train_removed, y_train_removed = self.get_removed_data(num_remove_list[remove_idx])
-                        accuracy_finetune, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, K_dict[num_remove_list[remove_idx]][epsilon], self.args.sigma, w_list)
-                        create_nested_folder('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/'+str(num_remove)+'/')
-                        np.save('./result/LMC/'+str(self.args.dataset)+'/paint_utility_epsilon/'+str(num_remove)+'/acc_finetune_epsilon'+str(epsilon)+'.npy', accuracy_finetune)
-                        K_list.append(K_dict[num_remove_list[0]][epsilon])
-
-        elif self.args.how_much_retrain == 1:
-            sigma_list = [0.05, 0.1, 0.2, 0.5, 1]
-            if self.args.dataset == 'MNIST':
-                K_list = [1301, 1031, 751, 351, 1]
-            elif self.args.dataset =='CIFAR10':
-                K_list = [1541, 1251, 951, 521, 151]
-            num_remove_list = [100]
-            X_train_removed, y_train_removed = self.get_removed_data(num_remove_list[0])
-            create_nested_folder('./result/LMC/'+str(self.args.dataset)+'/retrain/')
-            for sigma_idx, sigma in enumerate(sigma_list):
-                accuracy_scratch_D, mean_time = self.get_mean_performance(X_train_removed, y_train_removed, K_list[sigma_idx], sigma, None, len_list = 1)
-                np.save('./result/LMC/'+str(self.args.dataset)+'/retrain/'+str(sigma)+'_acc_scratch_D.npy', accuracy_scratch_D)
-                print('sigma:'+str(sigma))
-                print('mean acc:'+str(np.mean(accuracy_scratch_D)))
-                print('std acc:'+str(np.std(accuracy_scratch_D)))
         else:
             print('check!')
 
@@ -498,60 +392,6 @@ def get_mean_performance(self, X, y, step, sigma, w_list, projection, batch_size
             return trial_list, mean_time, new_w_list
         else:
             return trial_list, mean_time
-
-    def search_burnin(self, sigma_list, burn_in_list, fig_path = '_search_burnin.pdf'):
-        acc_dict = {}
-        for sigma in sigma_list:
-            acc_list = []
-            this_w_list = None
-            for idx in range(len(burn_in_list)):
-                if idx == 0:
-                    step = burn_in_list[idx]
-                else:
-                    step = burn_in_list[idx] - burn_in_list[idx - 1]
-                accuracy, _, new_w_list = self.get_mean_performance(self.X_train, self.y_train, step, sigma, this_w_list, return_w = True,
-                                                                    projection = self.projection, batch_size = self.batch_size, batch_idx = self.batch_idx)
-                this_w_list = new_w_list
-                acc_list.append(np.mean(accuracy))
-                print(acc_list)
-            plt.plot(burn_in_list, acc_list, label='sigma :'+str(sigma))
-            acc_dict[sigma] = acc_list
-            for i in range(len(burn_in_list)):
-                plt.text(burn_in_list[i], acc_list[i], f'{acc_list[i]:.3f}', ha='right', va='bottom')
-        plt.legend()
-        plt.title(str(self.args.dataset)+'search burn in')
-        plt.xlabel('burn in steps')
-        plt.ylabel('accuracy')
-        plt.savefig(str(self.args.dataset)+fig_path)
-        plt.clf()
-        return acc_dict
-
-    def search_batch(self, burn_in_list, batch_list, fig_path = '_search_batch.pdf'):
-        acc_dict = {}
-        for batch in batch_list:
-            acc_list = []
-            this_w_list = None
-            for idx in range(len(burn_in_list)):
-                if idx == 0:
-                    step = burn_in_list[idx]
-                else:
-                    step = burn_in_list[idx] - burn_in_list[idx - 1]
-                accuracy, _, new_w_list = self.get_mean_performance(self.X_train, self.y_train, step, self.args.sigma, this_w_list, return_w = True,
-                                                                    projection = self.projection, batch_size = batch, batch_idx = self.batch_idx)
-                this_w_list = new_w_list
-                acc_list.append(np.mean(accuracy))
-                print(acc_list)
-            plt.plot(burn_in_list, acc_list, label='batch:'+str(batch))
-            acc_dict[batch] = acc_list
-            for i in range(len(burn_in_list)):
-                plt.text(burn_in_list[i], acc_list[i], f'{acc_list[i]:.3f}', ha='right', va='bottom')
-            plt.legend()
-            plt.title(str(self.args.dataset)+' search burning in steps')
-            plt.xlabel('burn in steps')
-            plt.ylabel('accuracy')
-            plt.savefig(str(self.args.dataset)+str(batch)+fig_path)
-            plt.clf()
-        return acc_dict
 
     def test_accuracy(self, w_list):
         w = torch.tensor(w_list[0])
@@ -593,19 +433,14 @@ def main():
     parser.add_argument('--sigma', type = float, default = 0.03, help = 'the parameter sigma')
     parser.add_argument('--burn_in', type = int, default = 1000, help = 'burn in step number of LMC')
 
-    parser.add_argument('--search_burnin', type = int, default = 0, help = 'whether grid search to paint for burn-in')
-    parser.add_argument('--search_batch', type = int, default = 0, help = 'paint the batch size utility - s figure')
-    parser.add_argument('--paint_utility_epsilon', type = int, default = 0, help = 'paint utility - epsilon figure')
     parser.add_argument('--paint_unlearning_sigma', type = int, default = 0, help = 'paint unlearning utility - sigma figure')
-    parser.add_argument('--how_much_retrain', type = int, default = 0, help = 'supplementary for unlearning sigma')
-    parser.add_argument('--compare_baseline', type = int, default = 0, help = 'compare with baseline')
     parser.add_argument('--compare_baseline_nonconvergent', type = int, default = 0, help = 'compare with the baselines with nonconvergent calculation')
     parser.add_argument('--sequential', type = int, default = 0, help = 'sequential unlearni')
     args = parser.parse_args()
     print(args)
     runner = Runner(args)
     runner.get_metadata()
-    #import pdb; pdb.set_trace()
     runner.train()
+
 if __name__ == '__main__':
     main()