Skip to content

Commit

Permalink
print() is a function in Python 3
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss authored Jun 6, 2018
1 parent 73b04e5 commit 4f12cc3
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions src/py3.x/ML/13.PCA/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def pca(dataMat, topNfeat=9999999):

# 计算每一列的均值
meanVals = mean(dataMat, axis=0)
# print ('meanVals', meanVals)
# print('meanVals', meanVals)

# 每个向量同时都减去 均值
meanRemoved = dataMat - meanVals
# print ('meanRemoved=', meanRemoved)
# print('meanRemoved=', meanRemoved)

# cov协方差=[(x1-x均值)*(y1-y均值)+(x2-x均值)*(y2-y均值)+...+(xn-x均值)*(yn-y均值)+]/(n-1)
'''
Expand All @@ -53,8 +53,8 @@ def pca(dataMat, topNfeat=9999999):

# eigVals为特征值, eigVects为特征向量
eigVals, eigVects = linalg.eig(mat(covMat))
# print ('eigVals=', eigVals)
# print( 'eigVects=', eigVects)
# print('eigVals=', eigVals)
# print('eigVects=', eigVects)
# 对特征值,进行从小到大的排序,返回从小到大的index序号
# 特征值的逆序就可以得到topNfeat个最大的特征向量
'''
Expand All @@ -70,20 +70,20 @@ def pca(dataMat, topNfeat=9999999):
array([0, 2, 1])
'''
eigValInd = argsort(eigVals)
# print ('eigValInd1=', eigValInd)
# print('eigValInd1=', eigValInd)

# -1表示倒序,返回topN的特征值[-1 到 -(topNfeat+1) 但是不包括-(topNfeat+1)本身的倒叙]
eigValInd = eigValInd[:-(topNfeat+1):-1]
# print ('eigValInd2=', eigValInd)
# print('eigValInd2=', eigValInd)
# 重组 eigVects 最大到最小
redEigVects = eigVects[:, eigValInd]
# print ('redEigVects=', redEigVects.T)
# print('redEigVects=', redEigVects.T)
# 将数据转换到新空间
# print( "---", shape(meanRemoved), shape(redEigVects))
lowDDataMat = meanRemoved * redEigVects
reconMat = (lowDDataMat * redEigVects.T) + meanVals
# print ('lowDDataMat=', lowDDataMat)
# print ('reconMat=', reconMat)
# print('lowDDataMat=', lowDDataMat)
# print('reconMat=', reconMat)
return lowDDataMat, reconMat


Expand Down Expand Up @@ -130,7 +130,7 @@ def analyse_data(dataMat):
最后,我们可能会注意到有一些小的负值,他们主要源自数值误差应该四舍五入成0.
'''
print '主成分:%s, 方差占比:%s%%, 累积方差占比:%s%%' % (format(i+1, '2.0f'), format(line_cov_score/cov_all_score*100, '4.2f'), format(sum_cov_score/cov_all_score*100, '4.1f'))
print('主成分:%s, 方差占比:%s%%, 累积方差占比:%s%%' % (format(i+1, '2.0f'), format(line_cov_score/cov_all_score*100, '4.2f'), format(sum_cov_score/cov_all_score*100, '4.1f')))


if __name__ == "__main__":
Expand All @@ -140,14 +140,14 @@ def analyse_data(dataMat):
# lowDmat, reconMat = pca(dataMat, 1)
# # 只需要2个特征向量,和原始数据一致,没任何变化
# # lowDmat, reconMat = pca(dataMat, 2)
# # print (shape(lowDmat))
# # print(shape(lowDmat))
# show_picture(dataMat, reconMat)

# 利用PCA对半导体制造数据降维
dataMat = replaceNanWithMean()
print (shape(dataMat))
print(shape(dataMat))
# 分析数据
analyse_data(dataMat)
# lowDmat, reconMat = pca(dataMat, 20)
# print (shape(lowDmat))
# print(shape(lowDmat))
# show_picture(dataMat, reconMat)

0 comments on commit 4f12cc3

Please sign in to comment.