Hierarchial Clustering (aribis369#52)

* UCB(Reinforcement Learning) Upper Confidence Bound Learning.The Given Code is used to find which out of the 10 ads to be displayed on website for maximum Click Through Response by the user. The dataset is virtual showing what the ith user would have done if one of the 10 ads was shown to him i.e. 1 specifying he would have clicked it and 0 means he would have ignored the add * Hierarchial Clustering Hierarchial Clustering is type of clustering technique in which initially all datapoints are treated as seperate clusters.Then two clusters are merged with each together to for N-1 clusters (initially there are N datapoints).This process until a single cluster is formed.Dendrogram plot is formed to see which clusters were merged during the process and by studying the "Dendrograms" we decide the number of clusters to be formed To form the clusters I have used the "Ward" method which works on the principle that the within cluster 'variance' is minimised after two clusters are merged. To get a brief overview I have provided a link..http://www.stat.cmu.edu/~ryantibs/datamining/lectures/05-clus2-marked.pdf
kamrul3000 · Dec 16, 2017 · 1b4c063 · 1b4c063
1 parent 2cf08b5
commit 1b4c063
Show file tree

Hide file tree

Showing 4 changed files with 300 additions and 0 deletions.
diff --git a/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/Mall_Customers.csv b/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/Mall_Customers.csv
@@ -0,0 +1,201 @@
+CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
+0001,Male,19,15,39
+0002,Male,21,15,81
+0003,Female,20,16,6
+0004,Female,23,16,77
+0005,Female,31,17,40
+0006,Female,22,17,76
+0007,Female,35,18,6
+0008,Female,23,18,94
+0009,Male,64,19,3
+0010,Female,30,19,72
+0011,Male,67,19,14
+0012,Female,35,19,99
+0013,Female,58,20,15
+0014,Female,24,20,77
+0015,Male,37,20,13
+0016,Male,22,20,79
+0017,Female,35,21,35
+0018,Male,20,21,66
+0019,Male,52,23,29
+0020,Female,35,23,98
+0021,Male,35,24,35
+0022,Male,25,24,73
+0023,Female,46,25,5
+0024,Male,31,25,73
+0025,Female,54,28,14
+0026,Male,29,28,82
+0027,Female,45,28,32
+0028,Male,35,28,61
+0029,Female,40,29,31
+0030,Female,23,29,87
+0031,Male,60,30,4
+0032,Female,21,30,73
+0033,Male,53,33,4
+0034,Male,18,33,92
+0035,Female,49,33,14
+0036,Female,21,33,81
+0037,Female,42,34,17
+0038,Female,30,34,73
+0039,Female,36,37,26
+0040,Female,20,37,75
+0041,Female,65,38,35
+0042,Male,24,38,92
+0043,Male,48,39,36
+0044,Female,31,39,61
+0045,Female,49,39,28
+0046,Female,24,39,65
+0047,Female,50,40,55
+0048,Female,27,40,47
+0049,Female,29,40,42
+0050,Female,31,40,42
+0051,Female,49,42,52
+0052,Male,33,42,60
+0053,Female,31,43,54
+0054,Male,59,43,60
+0055,Female,50,43,45
+0056,Male,47,43,41
+0057,Female,51,44,50
+0058,Male,69,44,46
+0059,Female,27,46,51
+0060,Male,53,46,46
+0061,Male,70,46,56
+0062,Male,19,46,55
+0063,Female,67,47,52
+0064,Female,54,47,59
+0065,Male,63,48,51
+0066,Male,18,48,59
+0067,Female,43,48,50
+0068,Female,68,48,48
+0069,Male,19,48,59
+0070,Female,32,48,47
+0071,Male,70,49,55
+0072,Female,47,49,42
+0073,Female,60,50,49
+0074,Female,60,50,56
+0075,Male,59,54,47
+0076,Male,26,54,54
+0077,Female,45,54,53
+0078,Male,40,54,48
+0079,Female,23,54,52
+0080,Female,49,54,42
+0081,Male,57,54,51
+0082,Male,38,54,55
+0083,Male,67,54,41
+0084,Female,46,54,44
+0085,Female,21,54,57
+0086,Male,48,54,46
+0087,Female,55,57,58
+0088,Female,22,57,55
+0089,Female,34,58,60
+0090,Female,50,58,46
+0091,Female,68,59,55
+0092,Male,18,59,41
+0093,Male,48,60,49
+0094,Female,40,60,40
+0095,Female,32,60,42
+0096,Male,24,60,52
+0097,Female,47,60,47
+0098,Female,27,60,50
+0099,Male,48,61,42
+0100,Male,20,61,49
+0101,Female,23,62,41
+0102,Female,49,62,48
+0103,Male,67,62,59
+0104,Male,26,62,55
+0105,Male,49,62,56
+0106,Female,21,62,42
+0107,Female,66,63,50
+0108,Male,54,63,46
+0109,Male,68,63,43
+0110,Male,66,63,48
+0111,Male,65,63,52
+0112,Female,19,63,54
+0113,Female,38,64,42
+0114,Male,19,64,46
+0115,Female,18,65,48
+0116,Female,19,65,50
+0117,Female,63,65,43
+0118,Female,49,65,59
+0119,Female,51,67,43
+0120,Female,50,67,57
+0121,Male,27,67,56
+0122,Female,38,67,40
+0123,Female,40,69,58
+0124,Male,39,69,91
+0125,Female,23,70,29
+0126,Female,31,70,77
+0127,Male,43,71,35
+0128,Male,40,71,95
+0129,Male,59,71,11
+0130,Male,38,71,75
+0131,Male,47,71,9
+0132,Male,39,71,75
+0133,Female,25,72,34
+0134,Female,31,72,71
+0135,Male,20,73,5
+0136,Female,29,73,88
+0137,Female,44,73,7
+0138,Male,32,73,73
+0139,Male,19,74,10
+0140,Female,35,74,72
+0141,Female,57,75,5
+0142,Male,32,75,93
+0143,Female,28,76,40
+0144,Female,32,76,87
+0145,Male,25,77,12
+0146,Male,28,77,97
+0147,Male,48,77,36
+0148,Female,32,77,74
+0149,Female,34,78,22
+0150,Male,34,78,90
+0151,Male,43,78,17
+0152,Male,39,78,88
+0153,Female,44,78,20
+0154,Female,38,78,76
+0155,Female,47,78,16
+0156,Female,27,78,89
+0157,Male,37,78,1
+0158,Female,30,78,78
+0159,Male,34,78,1
+0160,Female,30,78,73
+0161,Female,56,79,35
+0162,Female,29,79,83
+0163,Male,19,81,5
+0164,Female,31,81,93
+0165,Male,50,85,26
+0166,Female,36,85,75
+0167,Male,42,86,20
+0168,Female,33,86,95
+0169,Female,36,87,27
+0170,Male,32,87,63
+0171,Male,40,87,13
+0172,Male,28,87,75
+0173,Male,36,87,10
+0174,Male,36,87,92
+0175,Female,52,88,13
+0176,Female,30,88,86
+0177,Male,58,88,15
+0178,Male,27,88,69
+0179,Male,59,93,14
+0180,Male,35,93,90
+0181,Female,37,97,32
+0182,Female,32,97,86
+0183,Male,46,98,15
+0184,Female,29,98,88
+0185,Female,41,99,39
+0186,Male,30,99,97
+0187,Female,54,101,24
+0188,Male,28,101,68
+0189,Female,41,103,17
+0190,Female,36,103,85
+0191,Female,34,103,23
+0192,Female,32,103,69
+0193,Male,33,113,8
+0194,Female,38,113,91
+0195,Female,47,120,16
+0196,Female,35,120,79
+0197,Female,45,126,28
+0198,Male,32,126,74
+0199,Male,32,137,18
+0200,Male,30,137,83
diff --git a/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/data_preprocessing_template.py b/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/data_preprocessing_template.py
@@ -0,0 +1,23 @@
+# Data Preprocessing Template
+
+# Importing the libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importing the dataset
+dataset = pd.read_csv('Data.csv')
+X = dataset.iloc[:, :-1].values
+y = dataset.iloc[:, 3].values
+
+# Splitting the dataset into the Training set and Test set
+from sklearn.cross_validation import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+# Feature Scaling
+"""from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)
+sc_y = StandardScaler()
+y_train = sc_y.fit_transform(y_train)"""
diff --git a/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hc.py b/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hc.py
@@ -0,0 +1,48 @@
+# Hierarchical Clustering
+
+# Importing the libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importing the dataset
+dataset = pd.read_csv('Mall_Customers.csv')
+X = dataset.iloc[:, [3, 4]].values
+# y = dataset.iloc[:, 3].values
+
+# Splitting the dataset into the Training set and Test set
+"""from sklearn.cross_validation import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""
+
+# Feature Scaling
+"""from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)
+sc_y = StandardScaler()
+y_train = sc_y.fit_transform(y_train)"""
+
+# Using the dendrogram to find the optimal number of clusters
+import scipy.cluster.hierarchy as sch
+dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))
+plt.title('Dendrogram')
+plt.xlabel('Customers')
+plt.ylabel('Euclidean distances')
+plt.show()
+
+# Fitting Hierarchical Clustering to the dataset
+from sklearn.cluster import AgglomerativeClustering
+hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
+y_hc = hc.fit_predict(X)
+
+# Visualising the clusters
+plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
+plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
+plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
+plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
+plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
+plt.title('Clusters of customers')
+plt.xlabel('Annual Income (k$)')
+plt.ylabel('Spending Score (1-100)')
+plt.legend()
+plt.show()
diff --git a/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hierarchy.py b/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hierarchy.py
@@ -0,0 +1,28 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+dataset=pd.read_csv('Mall_Customers.csv')
+x=dataset.iloc[:,[3,4]].values
+
+
+import scipy.cluster.hierarchy as sch
+dendrogram=sch.dendrogram(sch.linkage(x,method='ward'))
+plt.title('Dendrograms')
+plt.xlabel('Customers')
+plt.ylabel('Euclidean distances')
+plt.show()
+
+from sklearn.cluster import AgglomerativeClustering
+hc=AgglomerativeClustering(n_clusters=5,affinity='euclidean',linkage='ward')
+y_hc=hc.fit_predict(x)
+
+plt.scatter(x[y_hc==0,0],x[y_hc==0,1],s=50,c='red',label='cluster 1')
+plt.scatter(x[y_hc==1,0],x[y_hc==1,1],s=50,c='blue',label='cluster 2')
+plt.scatter(x[y_hc==2,0],x[y_hc==2,1],s=50,c='green',label='cluster 3')
+plt.scatter(x[y_hc==3,0],x[y_hc==3,1],s=50,c='cyan',label='cluster 4')
+plt.scatter(x[y_hc==4,0],x[y_hc==4,1],s=50,c='magenta',label='cluster 5')
+
+
+plt.legend()
+plt.show()