Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizer decomposing Neural Net matrices as sums of structured matrices #64

Draft
wants to merge 56 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
ef6e2e6
First draft Stochastic Hybrid Prox LMO
GeoffNN Mar 31, 2021
6afc510
Added stochastic Robust PCA example first draft
GeoffNN Mar 31, 2021
1d7b945
Stochastic Robust PCA example fixed
GeoffNN Apr 1, 2021
fa60642
Added layer nuclear norm constraints + tests
GeoffNN Apr 2, 2021
7d0005e
Bug fixes; slowly migrating to param_groups API for Optimizer
GeoffNN Apr 6, 2021
aa447e0
MNIST example LR+Sparse
GeoffNN Apr 6, 2021
6b53c43
Merge branch 'master' into geoff/stochastic_hybrid_prox_lmo
GeoffNN Apr 7, 2021
4f52df2
Merge branch 'master' into geoff/stochastic_hybrid_prox_lmo
GeoffNN Apr 7, 2021
578e8b7
Bug fix NuclearNorm prox
GeoffNN Apr 13, 2021
c921483
slowly migrating optimizers to correct **params API
GeoffNN Apr 13, 2021
a672e64
Stochastic Robust PCA example
GeoffNN Apr 13, 2021
3061108
Deleted obsolete LR + sparse example
GeoffNN Apr 13, 2021
2182446
Merge branch 'master' into geoff/stochastic_hybrid_prox_lmo
GeoffNN Apr 20, 2021
bbe9470
fixes to splitting method -- added gradient normalization
GeoffNN Apr 24, 2021
7671cf7
removed redundant code
GeoffNN Apr 28, 2021
b41a17b
Enforcing relationship between lr, lr_prox and lipschitz
GeoffNN May 1, 2021
227b627
Removed lr_prox parameter; using lipschitz estimate, consistently wit…
GeoffNN May 3, 2021
ccc4059
state is initialized at every step in hybrid optimizer
GeoffNN May 4, 2021
6bf2b13
Prox/LMO are now Modules, to ensure pickle-ability
GeoffNN May 7, 2021
346dd32
Fixed initialization bug: making y feasible w/ correct projection
GeoffNN May 7, 2021
4200710
Fixed stochastic robust PCA example + code snippet for extracting com…
GeoffNN May 8, 2021
22a6c19
Minor fixes
GeoffNN May 8, 2021
6cb9a32
Fix when prox2 is None
GeoffNN May 9, 2021
fb6ec7d
Fixed view/reshape issue
GeoffNN May 10, 2021
28eff81
Allowing 0 constraints
GeoffNN May 10, 2021
34b99a7
Changed dataloader + loss for efficiency
GeoffNN May 14, 2021
9afbd5d
Removed debug statements
GeoffNN May 14, 2021
e95dbed
L1 penalty prox w/ stochastic hybrid splitting
GeoffNN May 14, 2021
601b738
Penalized version of RobustPCA w/ generalized LMO
GeoffNN May 14, 2021
633eee6
Hybrid Prox method now works for penalty LMO
GeoffNN May 15, 2021
10b7c4e
Slight change
GeoffNN May 15, 2021
3c99631
Slight algo modification
GeoffNN May 18, 2021
5106cf3
Fixed penalty to use torch.svd instead of torch.linalg.svd + step siz…
GeoffNN May 18, 2021
2b2f669
Fixed svd calls
GeoffNN May 18, 2021
4291e1f
SVD call updated
GeoffNN May 18, 2021
e5671d0
Fixed penalized training + changes to svd calls
GeoffNN May 18, 2021
b75436b
took out print statement
utrerf May 18, 2021
5460b80
Merge pull request #67 from utrerf/patch-2
GeoffNN May 19, 2021
69bec20
Added penalty initialization
GeoffNN May 20, 2021
c0be0ff
removed todo
GeoffNN May 21, 2021
7028526
removed print, redundant computation
GeoffNN May 21, 2021
34409b1
removed comment
GeoffNN May 21, 2021
88d72b1
Updated CIFAR
utrerf May 23, 2021
24f2faa
Merge pull request #68 from utrerf/patch-3
GeoffNN May 23, 2021
9393c27
Minor rewrites
GeoffNN May 26, 2021
65da92f
Merge branch 'geoff/stochastic_hybrid_prox_lmo' of https://github.com…
GeoffNN May 26, 2021
14378dc
vectorized L1/Simplex projections
GeoffNN May 26, 2021
cd283ca
special case when nuclear norm is of diameter 0
GeoffNN May 27, 2021
429129b
Made Frank-Wolfe savable
GeoffNN May 27, 2021
d575a09
Updated ImageNet
utrerf Jun 1, 2021
8634690
Merge pull request #69 from utrerf/patch-4
GeoffNN Jun 3, 2021
68129af
took out the ch
utrerf Aug 6, 2021
1b9fd33
Merge pull request #70 from utrerf/patch-5
GeoffNN Aug 6, 2021
86ba4bd
Added tqdm for iters in optim
GeoffNN Aug 6, 2021
47aa745
Bug fix for penalized stochastic FW
GeoffNN Sep 17, 2021
ca1bb2b
Merge branch 'geoff/stochastic_hybrid_prox_lmo' of github.com:openopt…
GeoffNN Sep 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added stochastic Robust PCA example first draft
  • Loading branch information
GeoffNN committed Mar 31, 2021
commit 6afc5104a15217ff6fb818d96f6f8f929f65d26a
64 changes: 44 additions & 20 deletions chop/stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,35 +569,50 @@ def __init__(self, params, lmo, prox=None,
momentum=0., weight_decay=0.,
normalization='none'):

self.lmo = []
# initialize proxes
if prox is None:
prox = [None] * len(list(params))

prox_candidates = []
for prox_el in prox:
if prox_el is not None:
prox_candidates.append(lambda x, s=None: prox_el(x.unsqueeze(0), s).squeeze(0))
else:
prox_candidates.append(lambda x, s=None: x)
# initialize lmos
lmo_candidates = []
for oracle in lmo:
if oracle is None:
# Then FW will not be used on this parameter
raise ValueError("LMOs cannot be None for this optimizer.")
_lmo = None
else:
def _lmo(u, x):
update_direction, max_step_size = oracle(u.unsqueeze(0), x.unsqueeze(0))
return update_direction.squeeze(dim=0), max_step_size
self.lmo.append(_lmo)

if prox is None:
prox = [None] * len(list(params))
lmo_candidates.append(_lmo)

self.lmo = []
self.prox = []
for prox_el in prox:
if prox_el is not None:
self.prox.append(lambda x, s=None: prox_el(x.unsqueeze(0), s).squeeze(0))
useable_params = []
for param, lmo_oracle, prox_oracle in zip(params, lmo_candidates, prox_candidates):
if lmo_oracle:
useable_params.append(param)
self.lmo.append(lmo_oracle)
self.prox.append(prox_oracle)
else:
self.prox.append(lambda x, s=None: x)
msg = (f"No LMO was provided for parameter {param}. "
f"This optimizer will not optimize this parameter. "
f"Please pass this parameter to another optimizer.")
warnings.warn(msg)

for name, lr in (('lr_lmo', lr_lmo),
('lr_prox', lr_prox)):
if not type(lr) == float:
msg = f"{name} should be a float, got {lr}."
if not ((type(lr) == float) or lr == 'sublinear'):
msg = f"{name} should be a float or 'sublinear', got {lr}."
raise ValueError(msg)

if not(0. <= momentum <= 1.):
raise ValueError("omentum must be in [0., 1.].")
raise ValueError("momentum must be in [0., 1.].")

if not (weight_decay >= 0):
raise ValueError("weight_decay must be nonnegative.")
Expand Down Expand Up @@ -631,25 +646,34 @@ def step(self, closure=None):
for p in group['params']:
if p.grad is None:
continue
grad = p.grad + self.weight_decay * p

grad = p.grad
state = self.state[p]
if grad.is_sparse:
raise RuntimeError("We do not yet support sparse gradients.")
# Keep track of the step
state = self.state[p]

grad += group['weight_decay'] * p
# Initialization
if len(state) == 0:
state['step'] = 0.
# split variable: p = x + y
state['x'] = .5 * p.detach().clone()
state['y'] = .5 * p.detach().clone()
# initialize grad estimate
state['grad_est'] = grad
# initialize learning rates
state['lr_prox'] = group['lr_prox'] if type(group['lr_prox'] == float) else 0.
state['lr_lmo'] = group['lr_lmo'] if type(group['lr_lmo'] == float) else 0.
state['step'] += 1.
state['grad_est'].add_(grad, alpha=1. - group['momentum'])

for lr in ('lr_prox', 'lr_lmo'):
if group[lr] == 'sublinear':
state[lr] = 2. / (state['step'] + 2)

y_update, max_step_size = self.lmo[idx](-grad, state['y'])
state['lr_lmo'] = torch.minimum(state['lr_lmo'], max_step_size)
y_update, max_step_size = group['lmo'][idx](-state['grad_est'], state['y'])
state['lr_lmo'] = min(state['lr_lmo'], max_step_size)
w = y_update + state['y']
v = self.prox[idx](state['x'] + state['y'] - w - grad / state['lr_prox'], state['lr_prox'])
v = group['prox'][idx](state['x'] + state['y'] - w - state['grad_est'] / state['lr_prox'], group['lr_prox'])

state['y'].add_(y_update, alpha=state['lr_lmo'])
x_update = v - state['x']
Expand Down
9 changes: 6 additions & 3 deletions examples/plot_robust_PCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
m = 1000
n = 1000

r_p = [(5, 1e-3), (5, 3e-3), (25, 1e-3), (25, 3e-3),
(25, 3e-2), (130, 1e-2)]
r_p = [(5, 1e-3),
# (5, 3e-3), (25, 1e-3), (25, 3e-3),
# (25, 3e-2), (130, 1e-2)
]

for r, p in r_p:
print(f'r={r} and p={p}')
Expand All @@ -49,7 +51,7 @@

@utils.closure
def sqloss(Z):
return .5 * torch.linalg.norm((Z - M).squeeze(), ord='fro') ** 2
return .5 / M.numel() * torch.linalg.norm((Z - M).squeeze(), ord='fro') ** 2

rnuc = torch.linalg.norm(L.squeeze(), ord='nuc')
sL1 = abs(S).sum()
Expand Down Expand Up @@ -102,6 +104,7 @@ def line_search(kwargs):
fig.suptitle(f'r={r} and p={p}')

axes[0].plot(f_vals)
axes[0].set_ylim(0, 250)
axes[0].set_title("Function values")

axes[1].plot(sparse_comp)
Expand Down
102 changes: 102 additions & 0 deletions examples/plot_stochastic_robust_PCA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@

"""
Stochastic Robust PCA
===========

This example fits a Robust PCA model to data.
It uses a stochastic hybrid Frank-Wolfe and proximal method.
See description in :func:`chop.stochastic.SplittingProxFW`.


We reproduce the synthetic experimental setting from `[Garber et al. 2018] <https://arxiv.org/pdf/1802.05581.pdf>`_.
We aim to recover :math:`M = L + S + N`, where :math:`L` is rank :math:`p`,
:math:`S` is :math:`p` sparse, and :math:`N` is standard Gaussian elementwise.
"""


import matplotlib.pyplot as plt
import torch
import chop
from chop import utils
from chop.utils.logging import Trace


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

m = 1000
n = 1000

r_p = [(5, 1e-3),
# (5, 3e-3), (25, 1e-3), (25, 3e-3),
# (25, 3e-2), (130, 1e-2)
]

n_epochs = 100

for r, p in r_p:
print(f'r={r} and p={p}')
U = torch.normal(torch.zeros(m, r))
V = torch.normal(torch.zeros(r, n))

# Low rank component
L = 10 * utils.bmm(U, V)

# Sparse component
S = 100 * torch.normal(torch.zeros(m, n))

S *= (torch.rand_like(S) <= p)

# Add noise
N = torch.normal(torch.zeros(m, n))

M = L + S + N
M = M.to(device)

def sqloss(Z, M):
return .5 / M.numel() * torch.linalg.norm((Z - M).squeeze(), ord='fro') ** 2

rnuc = torch.linalg.norm(L.squeeze(), ord='nuc')
sL1 = abs(S).sum()

print(f"Initial L1 norm: {sL1}")
print(f"Initial Nuclear norm: {rnuc}")

rank_constraint = chop.constraints.NuclearNormBall(rnuc)
sparsity_constraint = chop.constraints.L1Ball(sL1)

lmo = rank_constraint.lmo
prox = sparsity_constraint.prox

Z = torch.zeros_like(M, device=device)
Z.requires_grad_(True)

sampler = torch.utils.data.BatchSampler(torch.utils.data.RandomSampler(range(M.size(0))),
batch_size=100,
drop_last=False)

optimizer = chop.stochastic.SplittingProxFW([Z], lmo=[lmo], prox=[prox],
lr_lmo='sublinear',
lr_prox='sublinear',
normalization='none')

train_losses = []
losses = []

for it in range(n_epochs):
for idx in sampler:
optimizer.zero_grad()
loss = sqloss(Z[idx], M[idx])
# for logging
with torch.no_grad():
full_loss = sqloss(Z, M)
losses.append(full_loss.item())
train_losses.append(loss.item())
loss.backward()
optimizer.step()


plt.plot(train_losses, label='training_losses')
plt.plot(losses, label='loss')
plt.ylim(0, 250)
plt.legend()
print("Done.")