Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/apachecn/MachineLearning int…
Browse files Browse the repository at this point in the history
…o dev
  • Loading branch information
jiangzhonglian committed May 8, 2018
2 parents c35db2e + 1e59a11 commit a7ee33f
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 10 deletions.
2 changes: 1 addition & 1 deletion docs/11.使用Apriori算法进行关联分析.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Apriori 算法流程步骤:
* 分析数据:使用任意方法。
* 训练数据:使用Apiori算法来找到频繁项集。
* 测试算法:不需要测试过程。
* 使用算法:用语发现频繁项集以及物品之间的关联规则
* 使用算法:用于发现频繁项集以及物品之间的关联规则
```

## Apriori 算法的使用
Expand Down
4 changes: 4 additions & 0 deletions src/py2.x/ML/15.BigData_MapReduce/proximalSVM.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
@author: Peter/ApacheCN-xy/片刻
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
'''
import base64
import pickle

import numpy


def map(key, value):
# input key= class for one training example, e.g. "-1.0"
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
Expand Down
29 changes: 29 additions & 0 deletions src/py2.x/ML/3.DecisionTree/DecisionTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ def fishTest():
print myTree
# [1, 1]表示要取的分支上的节点位置,对应的结果值
print classify(myTree, labels, [1, 1])

# 获得树的高度
print get_tree_height(myTree)

# 画图可视化展现
dtPlot.createPlot(myTree)
Expand All @@ -353,6 +356,32 @@ def ContactLensesTest():
print lensesTree
# 画图可视化展现
dtPlot.createPlot(lensesTree)


def get_tree_height(tree):
"""
Desc:
递归获得决策树的高度
Args:
tree
Returns:
树高
"""

if not isinstance(tree, dict):
return 1

child_trees = tree.values()[0].values()

# 遍历子树, 获得子树的最大高度
max_height = 0
for child_tree in child_trees:
child_tree_height = get_tree_height(child_tree)

if child_tree_height > max_height:
max_height = child_tree_height

return max_height + 1


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion src/py3.x/16.RecommenderSystems/test_evaluation_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import math
import random


def SplitData(data, M, k, seed):
test = []
Expand Down Expand Up @@ -69,4 +72,3 @@ def Popularity(train, test, N):
n += 1
ret /= n * 1.0
return ret

7 changes: 4 additions & 3 deletions src/py3.x/16.RecommenderSystems/test_基于物品.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import math
from operator import itemgetter


def ItemSimilarity1(train):
#calculate co-rated users between items
Expand Down Expand Up @@ -28,7 +31,7 @@ def ItemSimilarity2(train):
N[i] += 1
for j in users:
if i == j:
continue
continue
C[i][j] += 1 / math.log(1 + len(items) * 1.0)

#calculate finial similarity matrix W
Expand Down Expand Up @@ -60,5 +63,3 @@ def Recommendation2(train, user_id, W, K):
rank[j].weight += pi * wj
rank[j].reason[i] = pi * wj
return rank


4 changes: 3 additions & 1 deletion src/py3.x/16.RecommenderSystems/test_基于用户.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import math
from operator import itemgetter


def UserSimilarity1(train):
W = dict()
Expand Down Expand Up @@ -75,4 +78,3 @@ def Recommend(user, train, W):
continue
rank[i] += wuv * rvi
return rank

12 changes: 8 additions & 4 deletions src/py3.x/ML/15.BigData_MapReduce/proximalSVM.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
@author: Peter/ApacheCN-xy/片刻
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
'''
import base64
import pickle

import numpy


def map(key, value):
# input key= class for one training example, e.g. "-1.0"
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
Expand All @@ -19,16 +23,16 @@ def map(key, value):

# create matrix E and vector e
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
E = numpy.matrix(numpy.append(A, -e, axis=1))
E = numpy.matrix(numpy.append(A, -e, axis=1))

# create a tuple with the values to be used by reducer
# and encode it with base64 to avoid potential trouble with '\t' and '\n' used
# as default separators in Hadoop Streaming
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))
producedvalue = base64.b64encode(pickle.dumps((E.T*E, E.T*D*e)))

# note: a single constant key "producedkey" sends to only one reducer
# somewhat "atypical" due to low degree of parallism on reducer side
print "producedkey\t%s" % (producedvalue)
print("producedkey\t%s" % (producedvalue))

def reduce(key, values, mu=0.1):
sumETE = None
Expand All @@ -52,4 +56,4 @@ def reduce(key, values, mu=0.1):
# note: omega = result[:-1] and gamma = result[-1]
# but printing entire vector as output
result = sumETE.I*sumETDe
print "%s\t%s" % (key, str(result.tolist()))
print("%s\t%s" % (key, str(result.tolist())))
2 changes: 2 additions & 0 deletions src/py3.x/ML/8.Regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,3 +582,5 @@ def regression5():
# regression3()
# regression4()
# regression5()
pass

0 comments on commit a7ee33f

Please sign in to comment.