Skip to content

Commit

Permalink
HW2 Completed
Browse files Browse the repository at this point in the history
  • Loading branch information
PieroMacaluso committed Jan 15, 2019
1 parent 7aa8373 commit 6115317
Show file tree
Hide file tree
Showing 14 changed files with 334 additions and 150 deletions.
227 changes: 113 additions & 114 deletions hw2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm, datasets
import pandas as pd
import matplotlib
from IPython.display import display, HTML

# TODO: Change this to 252894 to obtain results of the report
r_state = 252894
r_state = None


def make_meshgrid(x, y, h=.02):
Expand Down Expand Up @@ -60,20 +57,15 @@ def plot_contours(ax, clf, xx, yy, **params):

def plot_data(ax, x, y, xx, yy, clf):
label = ("setosa", "versicolor", "virginica")
color = ["red", "blue", "green"]
color = ["red", "blue", "lightgreen"]
norm = matplotlib.colors.Normalize(vmin=0, vmax=2)
cmap_plot = matplotlib.colors.ListedColormap(color)
plot_contours(ax, clf, xx, yy, norm=norm, cmap=cmap_plot, alpha=0.4)
for j in range(0, 3):
ax.scatter(x[y == j, 0], x[y == j, 1], c=color[j], label=label[j], s=20, edgecolors='k')
plot_contours(ax, clf, xx, yy, norm=norm, cmap=cmap_plot, alpha=0.5)


def main():
legend_data = ("setosa", "versicolor", "virginica")
legend_color = ["red", "blue", "green"]
norm = matplotlib.colors.Normalize(vmin=0, vmax=2)
cmap_plot = matplotlib.colors.ListedColormap(legend_color)

# Section 1
(x, y) = datasets.load_iris(return_X_y=True)
x = x[:, :2] # we only take the first two features. Skip PCA
Expand All @@ -84,16 +76,16 @@ def main():
# Fig01a
fig = plt.figure(figsize=(8, 12))
acc = np.empty(7)
c = 1e-3
c_i = 1e-3
c_best = 1e-3
a_best = 0
i = 1
xx, yy = make_meshgrid(x[:, 0], x[:, 1])
while c <= 1e3:
clf = svm.LinearSVC(C=c, random_state=r_state)
while c_i <= 1e3:
clf = svm.LinearSVC(C=c_i, random_state=r_state)
acc[i - 1] = clf.fit(x_train, y_train).score(x_val, y_val) * 100
if acc[i - 1] > a_best:
c_best = c
c_best = c_i
a_best = acc[i - 1]
ax = fig.add_subplot(4, 2, i)
plot_data(ax, x_train, y_train, xx, yy, clf)
Expand All @@ -104,21 +96,20 @@ def main():
ax.set_xticks(())
ax.set_yticks(())
ax.legend()
ax.set_title('C=%2.2E A=%2.1f%% ' % (c, acc[i - 1]))

c = c * 10
ax.set_title('C=%2.2E A=%2.1f%% ' % (c_i, acc[i - 1]))
c_i = c_i * 10
i = i + 1
fig.suptitle("Linear SVM - C tuning - C_best = %2.2E A_best = %2.1f%%" %
(c_best, a_best), fontsize=14, fontweight='bold')

fig.suptitle("Linear SVM - C tuning - C_best = %2.2E A_best = %2.1f%%" % (c_best, a_best), fontsize=14,
fontweight='bold')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
plt.savefig("report/img/fig01a.png", transparent=False, dpi=300)
# plt.savefig("report/img/fig01a.png", transparent=False, dpi=300)
fig.show()

# fig01b
fig = plt.figure(figsize=(7, 5))
fig.suptitle("Linear SVM - C tuning - C_best = %2.2E A_best = %2.2f%%" %
(c_best, a_best), fontsize=14, fontweight='bold')
fig.suptitle("Linear SVM - C tuning - C_best = %2.2E A_best = %2.2f%%" % (c_best, a_best), fontsize=14,
fontweight='bold')
ax = fig.add_subplot(1, 1, 1)
V = np.array(['1e-3', '1e-2', '1e-1', '1e0', '1e1', '1e2', '1e3'])
ax.bar(V, acc)
Expand All @@ -127,177 +118,185 @@ def main():
plt.grid(True)
ax.set_yticks(acc)
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
plt.savefig("report/img/fig01b.png", transparent=False, dpi=300)
# plt.savefig("report/img/fig01b.png", transparent=False, dpi=300)
fig.show()

fig = plt.figure() # 3
ax = fig.add_subplot(1, 1, 1)

# fig01c
fig.suptitle("Linear SVM with validation accuracy of %2.2f%%" %
a_best, fontsize=14, fontweight='bold')
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
fig.suptitle("Linear SVM with validation accuracy of %2.2f%%" % a_best, fontsize=14, fontweight='bold')
clf = svm.LinearSVC(C=c_best, random_state=r_state)
a = clf.fit(x_train, y_train).score(x_test, y_test)*100
a = clf.fit(x_train, y_train).score(x_test, y_test) * 100
plot_data(ax, x_test, y_test, xx, yy, clf)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_xticks(())
ax.set_yticks(())
ax.legend()
ax.set_title('C=%2.2E A=%2.2f%% ' % (c_best, a))
ax.legend()
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
plt.savefig("report/img/fig01c.png", transparent=False, dpi=300)
# plt.savefig("report/img/fig01c.png", transparent=False, dpi=300)
fig.show()

fig = plt.figure() # 4
# fig02a
fig = plt.figure(figsize=(8, 12))
acc = np.empty(7)
c = 1e-3
c_i = 1e-3
c_best = 1e-3
a_best = 0
i = 1

xx, yy = make_meshgrid(x[:, 0], x[:, 1])
while c <= 1e3:
clf = svm.SVC(kernel='rbf', C=c, random_state=r_state)
acc[i - 1] = clf.fit(x_train, y_train).score(x_val, y_val)
while c_i <= 1e3:
clf = svm.SVC(kernel='rbf', C=c_i, random_state=r_state)
acc[i - 1] = clf.fit(x_train, y_train).score(x_val, y_val) * 100
if acc[i - 1] > a_best:
c_best = c
c_best = c_i
a_best = acc[i - 1]
ax = fig.add_subplot(3, 3, i)
plot_contours(ax, clf, xx, yy, cmap=plt.get_cmap("coolwarm"), alpha=0.8)
ax.scatter(x_train[:, 0], x_train[:, 1], c=y_train,
cmap=plt.get_cmap("coolwarm"), s=20, edgecolors='k')
ax = fig.add_subplot(4, 2, i)
plot_data(ax, x_train, y_train, xx, yy, clf)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_xticks(())
ax.set_yticks(())
Z = clf.predict(x_val)
ax.set_title('C=%2.2E A=%.2f ' % (c, acc[i - 1]))

c = c * 10
ax.legend()
ax.set_title('C=%2.2E A=%.2f%%' % (c_i, acc[i - 1]))
c_i = c_i * 10
i = i + 1

fig.suptitle("RBF Kernel - C/G tuning - C_best=%2.2E A_best=%2.2f" %
(c_best, a_best), fontsize=14, fontweight='bold')
fig.suptitle("RBF Kernel - C/G tuning - C_best=%2.2E A_best=%2.2f%%" % (c_best, a_best), fontsize=14,
fontweight='bold')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig02a.png", transparent=False, dpi=300)
fig.show()

fig = plt.figure() # 5
fig.suptitle("RBF Kernel - C/G tuning - C_best=%2.2E A_best=%2.2f" %
(c_best, a_best), fontsize=14, fontweight='bold')
# fig02b
fig = plt.figure()
fig.suptitle("RBF Kernel - C/G tuning - C_best=%2.2E A_best=%2.2f%%" % (c_best, a_best), fontsize=14,
fontweight='bold')
ax = fig.add_subplot(1, 1, 1)
ax.bar(np.array(['1e-3', '1e-2', '1e-1', '1e0', '1e1', '1e2', '1e3']), acc)
ax.set_xlabel('C')
ax.set_ylabel('Accuracy %')
plt.grid(True)
ax.set_yticks(acc)
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig02b.png", transparent=False, dpi=300)
fig.show()

fig = plt.figure() # 6
# fig02c
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

# https://www.quora.com/What-is-the-purpose-for-using-slack-variable-in-SVM
fig.suptitle("RBF Kernel with validation accuracy of %2.2f" %
a_best, fontsize=14, fontweight='bold')
fig.suptitle("RBF Kernel with validation accuracy of %2.2f%%" % a_best, fontsize=14, fontweight='bold')

clf = svm.SVC(kernel='rbf', C=c_best, random_state=r_state)
a = clf.fit(x_train, y_train).score(x_test, y_test)
plot_contours(ax, clf, xx, yy, cmap=plt.get_cmap("coolwarm"), alpha=0.8)
ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test,
cmap=plt.get_cmap("coolwarm"), s=20, edgecolors='k')
a = clf.fit(x_train, y_train).score(x_test, y_test) * 100
plot_data(ax, x_test, y_test, xx, yy, clf)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title('C=%2.2E A=%.2f ' % (c_best, a))
ax.set_title('C=%2.2E A=%2.2f%% ' % (c_best, a))
ax.legend()
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig02c.png", transparent=False, dpi=300)
fig.show()

# Grid Search of C and Gamma
c = np.array([1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3])
Gamma = np.array(
[1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10])
gamma = np.array(
[1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9])
c_best = c.min()
G_best = Gamma.min()
g_best = gamma.min()
a_best = 0
res = np.zeros([c.shape[0], Gamma.shape[0]])
for c, i in zip(c, range(0, c.shape[0])):
for gamma, j in zip(Gamma, range(0, Gamma.shape[0])):
clf = svm.SVC(kernel='rbf', gamma=gamma, C=c, random_state=r_state)
res[i, j] = clf.fit(x_train, y_train).score(x_val, y_val)
res = np.zeros([c.shape[0], gamma.shape[0]])
for c_i, i in zip(c, range(0, c.shape[0])):
for gamma_i, j in zip(gamma, range(0, gamma.shape[0])):
clf = svm.SVC(kernel='rbf', gamma=gamma_i, C=c_i, random_state=r_state)
res[i, j] = clf.fit(x_train, y_train).score(x_val, y_val) * 100
if res[i, j] > a_best:
c_best = c
G_best = gamma
c_best = c_i
g_best = gamma_i
a_best = res[i, j]

df = pd.DataFrame(res, index=c, columns=Gamma)

fig, ax = plt.subplots() # 7
fig.suptitle("RBF Kernel - C/G tuning - C_best=%2.2E A_best=%2.2f" %
(c_best, a_best), fontsize=14, fontweight='bold')
# fig02d
fig, ax = plt.subplots()
fig.suptitle("RBF Kernel - C/G tuning - Validation Accuracy", fontsize=14, fontweight='bold')
fig.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
plt.imshow(res.reshape(len(c), len(Gamma)),
interpolation='nearest', cmap=plt.get_cmap("hot"))
plt.imshow(res.reshape(len(c), len(gamma)), interpolation='nearest', cmap=plt.get_cmap("hot"))
ax.set_xlabel('gamma')
ax.set_ylabel('C')
plt.colorbar()
ax.grid(True)
plt.xticks(np.arange(len(Gamma)), Gamma, rotation=90)
ax.ticklabel_format(axis='both', style='sci')
plt.xticks(np.arange(len(gamma)), gamma, rotation=90)
plt.yticks(np.arange(len(c)), c)

ax.set_title('Validation accuracy')
ax.set_title('C_best=%2.2E G_best=%2.2E A_best=%2.2f%%' % (c_best, g_best, a_best))
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig02d.png", transparent=False, dpi=300)
fig.show()

clf = svm.SVC(kernel='rbf', gamma=G_best, C=c_best, random_state=r_state)

acc = clf.fit(x_train, y_train).score(x_test, y_test)
# fig02e
clf = svm.SVC(kernel='rbf', gamma=g_best, C=c_best, random_state=r_state)
acc = clf.fit(x_train, y_train).score(x_test, y_test) * 100
fig, ax = plt.subplots()
fig.suptitle("RBF Kernel with validation accuracy of %2.2f" %
a_best, fontsize=14, fontweight='bold')
plot_contours(ax, clf, xx, yy, cmap=plt.get_cmap("coolwarm"), alpha=0.8)
ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test,
cmap=plt.get_cmap("coolwarm"), s=20, edgecolors='k')
fig.suptitle("RBF Kernel with validation accuracy of %2.2f%%" % a_best, fontsize=14, fontweight='bold')
plot_data(ax, x_test, y_test, xx, yy, clf)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_title('C=%2.2E Gamma=%2.2E A=%.2f ' % (c_best, G_best, acc))
ax.set_title('C=%2.2E Gamma=%2.2E A=%2.2f%%' % (c_best, g_best, acc))
ax.legend()
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig02e.png", transparent=False, dpi=300)
fig.show()

# Merging training and validation sets
x_train = np.vstack([x_train, x_val])
y_train = np.concatenate([y_train, y_val])

# 5-fold validation
c_best = c.min()
G_best = Gamma.min()
g_best = gamma.min()
a_best = 0
kf = KFold(n_splits=5, shuffle=True, random_state=r_state)
scores = np.zeros([len(Gamma), len(c), kf.get_n_splits(x_train)])
for i, gamma in enumerate(Gamma):
for j, c in enumerate(c):
for k, (train_index, test_index) in enumerate(kf.split(x_train)):
# print("TRAIN:", train_index, "TEST:", test_index)
clf = svm.SVC(kernel='rbf', gamma=gamma, C=c)
scores[i, j, k] = clf.fit(x_train[train_index], y_train[train_index]).score(
x_train[test_index], y_train[test_index])
if scores[i, j, k] > a_best:
c_best = c
G_best = gamma
a_best = scores[i, j, k]

clf = svm.SVC(kernel='rbf', gamma=G_best, C=c_best, random_state=r_state)
acc = clf.fit(x_train, y_train).score(x_test, y_test)
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=r_state)
scores = np.zeros([len(gamma), len(c)])
for i, gamma_i in enumerate(gamma):
for j, c_i in enumerate(c):
temp = np.zeros(kf.n_splits)
for k_i, (train_index, test_index) in enumerate(kf.split(x_train)):
clf = svm.SVC(kernel='rbf', gamma=gamma_i, C=c_i)
clf.fit(x_train[train_index], y_train[train_index])
temp[k_i] = clf.score(x_train[test_index], y_train[test_index]) * 100
acc_av = np.average(temp)
if acc_av > a_best:
c_best = c_i
g_best = gamma_i
a_best = acc_av

clf = svm.SVC(kernel='rbf', gamma=g_best, C=c_best, random_state=r_state)
acc = clf.fit(x_train, y_train).score(x_test, y_test) * 100

# fig03
fig, ax = plt.subplots()
fig.suptitle("RBF Kernel K-Fold with validation accuracy of %2.2f" %
fig.suptitle("RBF Kernel K-Fold with validation accuracy of %2.2f%%" %
a_best, fontsize=14, fontweight='bold')
plot_contours(ax, clf, xx, yy, cmap=plt.get_cmap("coolwarm"), alpha=0.8)
ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test,
cmap=plt.get_cmap("coolwarm"), s=20, edgecolors='k')
plot_data(ax, x_test, y_test, xx, yy, clf)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_title('C=%2.2E Gamma=%2.2E A=%.2f ' % (c_best, G_best, acc))
ax.set_title('C=%2.2E Gamma=%2.2E A=%2.2f%% ' % (c_best, g_best, acc))
ax.legend()
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0, rect=[0, 0.03, 1, 0.95])
# plt.savefig("report/img/fig03.png", transparent=False, dpi=300)
fig.show()
plt.show()

Expand Down
Binary file removed hw2/report/img/fig01.png
Binary file not shown.
Binary file modified hw2/report/img/fig01a.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw2/report/img/fig01c.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig02a.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig02b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig02c.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig02d.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig02e.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw2/report/img/fig03.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw2/report/reporthw2.pdf
Binary file not shown.
Loading

0 comments on commit 6115317

Please sign in to comment.