-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab_utils_week3.py
296 lines (245 loc) · 11.3 KB
/
lab_utils_week3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""
lab_utils_common
contains common routines and variable definitions
used by all the labs in this week.
by contrast, specific, large plotting routines will be in separate files
and are generally imported into the week where they are used.
those files will import this file
"""
import copy
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from ipywidgets import Output
np.set_printoptions(precision=2)
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0')
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
plt.style.use('./deeplearning.mplstyle')
def sigmoid(z):
"""
Compute the sigmoid of z
Parameters
----------
z : array_like
A scalar or numpy array of any size.
Returns
-------
g : array_like
sigmoid(z)
"""
z = np.clip( z, -500, 500 ) # protect against overflow
g = 1.0/(1.0+np.exp(-z))
return g
##########################################################
# Regression Routines
##########################################################
def predict_logistic(X, w, b):
""" performs prediction """
return sigmoid(X @ w + b)
def predict_linear(X, w, b):
""" performs prediction """
return X @ w + b
def compute_cost_logistic(X, y, w, b, lambda_=0, safe=False):
"""
Computes cost using logistic loss, non-matrix version
Args:
X (ndarray): Shape (m,n) matrix of examples with n features
y (ndarray): Shape (m,) target values
w (ndarray): Shape (n,) parameters for prediction
b (scalar): parameter for prediction
lambda_ : (scalar, float) Controls amount of regularization, 0 = no regularization
safe : (boolean) True-selects under/overflow safe algorithm
Returns:
cost (scalar): cost
"""
m,n = X.shape
cost = 0.0
for i in range(m):
z_i = np.dot(X[i],w) + b #(n,)(n,) or (n,) ()
if safe: #avoids overflows
cost += -(y[i] * z_i ) + log_1pexp(z_i)
else:
f_wb_i = sigmoid(z_i) #(n,)
cost += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i) # scalar
cost = cost/m
reg_cost = 0
if lambda_ != 0:
for j in range(n):
reg_cost += (w[j]**2) # scalar
reg_cost = (lambda_/(2*m))*reg_cost
return cost + reg_cost
def log_1pexp(x, maximum=20):
''' approximate log(1+exp^x)
https://stats.stackexchange.com/questions/475589/numerical-computation-of-cross-entropy-in-practice
Args:
x : (ndarray Shape (n,1) or (n,) input
out : (ndarray Shape matches x output ~= np.log(1+exp(x))
'''
out = np.zeros_like(x,dtype=float)
i = x <= maximum
ni = np.logical_not(i)
out[i] = np.log(1 + np.exp(x[i]))
out[ni] = x[ni]
return out
def compute_cost_matrix(X, y, w, b, logistic=False, lambda_=0, safe=True):
"""
Computes the cost using using matrices
Args:
X : (ndarray, Shape (m,n)) matrix of examples
y : (ndarray Shape (m,) or (m,1)) target value of each example
w : (ndarray Shape (n,) or (n,1)) Values of parameter(s) of the model
b : (scalar ) Values of parameter of the model
verbose : (Boolean) If true, print out intermediate value f_wb
Returns:
total_cost: (scalar) cost
"""
m = X.shape[0]
y = y.reshape(-1,1) # ensure 2D
w = w.reshape(-1,1) # ensure 2D
if logistic:
if safe: #safe from overflow
z = X @ w + b #(m,n)(n,1)=(m,1)
cost = -(y * z) + log_1pexp(z)
cost = np.sum(cost)/m # (scalar)
else:
f = sigmoid(X @ w + b) # (m,n)(n,1) = (m,1)
cost = (1/m)*(np.dot(-y.T, np.log(f)) - np.dot((1-y).T, np.log(1-f))) # (1,m)(m,1) = (1,1)
cost = cost[0,0] # scalar
else:
f = X @ w + b # (m,n)(n,1) = (m,1)
cost = (1/(2*m)) * np.sum((f - y)**2) # scalar
reg_cost = (lambda_/(2*m)) * np.sum(w**2) # scalar
total_cost = cost + reg_cost # scalar
return total_cost # scalar
def compute_gradient_matrix(X, y, w, b, logistic=False, lambda_=0):
"""
Computes the gradient using matrices
Args:
X : (ndarray, Shape (m,n)) matrix of examples
y : (ndarray Shape (m,) or (m,1)) target value of each example
w : (ndarray Shape (n,) or (n,1)) Values of parameters of the model
b : (scalar ) Values of parameter of the model
logistic: (boolean) linear if false, logistic if true
lambda_: (float) applies regularization if non-zero
Returns
dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w
dj_db: (scalar) The gradient of the cost w.r.t. the parameter b
"""
m = X.shape[0]
y = y.reshape(-1,1) # ensure 2D
w = w.reshape(-1,1) # ensure 2D
f_wb = sigmoid( X @ w + b ) if logistic else X @ w + b # (m,n)(n,1) = (m,1)
err = f_wb - y # (m,1)
dj_dw = (1/m) * (X.T @ err) # (n,m)(m,1) = (n,1)
dj_db = (1/m) * np.sum(err) # scalar
dj_dw += (lambda_/m) * w # regularize # (n,1)
return dj_db, dj_dw # scalar, (n,1)
def gradient_descent(X, y, w_in, b_in, alpha, num_iters, logistic=False, lambda_=0, verbose=True):
"""
Performs batch gradient descent to learn theta. Updates theta by taking
num_iters gradient steps with learning rate alpha
Args:
X (ndarray): Shape (m,n) matrix of examples
y (ndarray): Shape (m,) or (m,1) target value of each example
w_in (ndarray): Shape (n,) or (n,1) Initial values of parameters of the model
b_in (scalar): Initial value of parameter of the model
logistic: (boolean) linear if false, logistic if true
lambda_: (float) applies regularization if non-zero
alpha (float): Learning rate
num_iters (int): number of iterations to run gradient descent
Returns:
w (ndarray): Shape (n,) or (n,1) Updated values of parameters; matches incoming shape
b (scalar): Updated value of parameter
"""
# An array to store cost J and w's at each iteration primarily for graphing later
J_history = []
w = copy.deepcopy(w_in) #avoid modifying global w within function
b = b_in
w = w.reshape(-1,1) #prep for matrix operations
y = y.reshape(-1,1)
for i in range(num_iters):
# Calculate the gradient and update the parameters
dj_db,dj_dw = compute_gradient_matrix(X, y, w, b, logistic, lambda_)
# Update Parameters using w, b, alpha and gradient
w = w - alpha * dj_dw
b = b - alpha * dj_db
# Save cost J at each iteration
if i<100000: # prevent resource exhaustion
J_history.append( compute_cost_matrix(X, y, w, b, logistic, lambda_) )
# Print cost every at intervals 10 times or as many iterations if < 10
if i% math.ceil(num_iters / 10) == 0:
if verbose: print(f"Iteration {i:4d}: Cost {J_history[-1]} ")
return w.reshape(w_in.shape), b, J_history #return final w,b and J history for graphing
def zscore_normalize_features(X):
"""
computes X, zcore normalized by column
Args:
X (ndarray): Shape (m,n) input data, m examples, n features
Returns:
X_norm (ndarray): Shape (m,n) input normalized by column
mu (ndarray): Shape (n,) mean of each feature
sigma (ndarray): Shape (n,) standard deviation of each feature
"""
# find the mean of each column/feature
mu = np.mean(X, axis=0) # mu will have shape (n,)
# find the standard deviation of each column/feature
sigma = np.std(X, axis=0) # sigma will have shape (n,)
# element-wise, subtract mu for that column from each example, divide by std for that column
X_norm = (X - mu) / sigma
return X_norm, mu, sigma
#check our work
#from sklearn.preprocessing import scale
#scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)
######################################################
# Common Plotting Routines
######################################################
def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):
""" plots logistic data with two axis """
# Find Indices of Positive and Negative Examples
pos = y == 1
neg = y == 0
pos = pos.reshape(-1,) #work with 1D or 1D y vectors
neg = neg.reshape(-1,)
# Plot examples
ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)
ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors=dlblue, lw=3)
ax.legend(loc=loc)
ax.figure.canvas.toolbar_visible = False
ax.figure.canvas.header_visible = False
ax.figure.canvas.footer_visible = False
def plt_tumor_data(x, y, ax):
""" plots tumor data on one axis """
pos = y == 1
neg = y == 0
ax.scatter(x[pos], y[pos], marker='x', s=80, c = 'red', label="malignant")
ax.scatter(x[neg], y[neg], marker='o', s=100, label="benign", facecolors='none', edgecolors=dlblue,lw=3)
ax.set_ylim(-0.175,1.1)
ax.set_ylabel('y')
ax.set_xlabel('Tumor Size')
ax.set_title("Logistic Regression on Categorical Data")
ax.figure.canvas.toolbar_visible = False
ax.figure.canvas.header_visible = False
ax.figure.canvas.footer_visible = False
# Draws a threshold at 0.5
def draw_vthresh(ax,x):
""" draws a threshold """
ylim = ax.get_ylim()
xlim = ax.get_xlim()
ax.fill_between([xlim[0], x], [ylim[1], ylim[1]], alpha=0.2, color=dlblue)
ax.fill_between([x, xlim[1]], [ylim[1], ylim[1]], alpha=0.2, color=dldarkred)
ax.annotate("z >= 0", xy= [x,0.5], xycoords='data',
xytext=[30,5],textcoords='offset points')
d = FancyArrowPatch(
posA=(x, 0.5), posB=(x+3, 0.5), color=dldarkred,
arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
)
ax.add_artist(d)
ax.annotate("z < 0", xy= [x,0.5], xycoords='data',
xytext=[-50,5],textcoords='offset points', ha='left')
f = FancyArrowPatch(
posA=(x, 0.5), posB=(x-3, 0.5), color=dlblue,
arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
)
ax.add_artist(f)