Merge branch 'dev' of https://github.com/apachecn/MachineLearning int…

…o dev
zhaodongzhi · May 8, 2018 · a7ee33f · a7ee33f
2 parents c35db2e + 1e59a11
commit a7ee33f
Show file tree

Hide file tree

Showing 8 changed files with 54 additions and 10 deletions.
diff --git a/docs/11.使用Apriori算法进行关联分析.md b/docs/11.使用Apriori算法进行关联分析.md
@@ -60,7 +60,7 @@ Apriori 算法流程步骤：
 * 分析数据：使用任意方法。
 * 训练数据：使用Apiori算法来找到频繁项集。
 * 测试算法：不需要测试过程。
-* 使用算法：用语发现频繁项集以及物品之间的关联规则。
+* 使用算法：用于发现频繁项集以及物品之间的关联规则。
 ```
 
 ## Apriori 算法的使用

diff --git a/src/py2.x/ML/15.BigData_MapReduce/proximalSVM.py b/src/py2.x/ML/15.BigData_MapReduce/proximalSVM.py
@@ -6,8 +6,12 @@
 @author: Peter/ApacheCN-xy/片刻
 《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
 '''
+import base64
+import pickle
+
 import numpy
 
+
 def map(key, value):
    # input key= class for one training example, e.g. "-1.0"
    classes = [float(item) for item in key.split(",")]   # e.g. [-1.0]

diff --git a/src/py2.x/ML/3.DecisionTree/DecisionTree.py b/src/py2.x/ML/3.DecisionTree/DecisionTree.py
@@ -327,6 +327,9 @@ def fishTest():
     print myTree
     # [1, 1]表示要取的分支上的节点位置，对应的结果值
     print classify(myTree, labels, [1, 1])
+
+    # 获得树的高度
+    print get_tree_height(myTree)
 
     # 画图可视化展现
     dtPlot.createPlot(myTree)
@@ -353,6 +356,32 @@ def ContactLensesTest():
     print lensesTree
     # 画图可视化展现
     dtPlot.createPlot(lensesTree)
+
+
+def get_tree_height(tree):
+    """
+     Desc:
+        递归获得决策树的高度
+    Args:
+        tree
+    Returns:
+        树高
+    """
+
+    if not isinstance(tree, dict):
+        return 1
+
+    child_trees = tree.values()[0].values()
+
+    # 遍历子树, 获得子树的最大高度
+    max_height = 0
+    for child_tree in child_trees:
+        child_tree_height = get_tree_height(child_tree)
+
+        if child_tree_height > max_height:
+            max_height = child_tree_height
+
+    return max_height + 1
 
 
 if __name__ == "__main__":

diff --git a/src/py3.x/16.RecommenderSystems/test_evaluation_model.py b/src/py3.x/16.RecommenderSystems/test_evaluation_model.py
@@ -1,3 +1,6 @@
+import math
+import random
+
 
 def SplitData(data, M, k, seed):
     test = []
@@ -69,4 +72,3 @@ def Popularity(train, test, N):
             n += 1
     ret /= n * 1.0
     return ret
-
diff --git a/src/py3.x/16.RecommenderSystems/test_基于物品.py b/src/py3.x/16.RecommenderSystems/test_基于物品.py
@@ -1,3 +1,6 @@
+import math
+from operator import itemgetter
+
 
 def ItemSimilarity1(train):
     #calculate co-rated users between items
@@ -28,7 +31,7 @@ def ItemSimilarity2(train):
             N[i] += 1
             for j in users:
                 if i == j:
-                continue
+                    continue
             C[i][j] += 1 / math.log(1 + len(items) * 1.0)
 
     #calculate finial similarity matrix W
@@ -60,5 +63,3 @@ def Recommendation2(train, user_id, W, K):
             rank[j].weight += pi * wj
             rank[j].reason[i] = pi * wj
     return rank
-
-
diff --git a/src/py3.x/16.RecommenderSystems/test_基于用户.py b/src/py3.x/16.RecommenderSystems/test_基于用户.py
@@ -1,3 +1,6 @@
+import math
+from operator import itemgetter
+
 
 def UserSimilarity1(train):
     W = dict()
@@ -75,4 +78,3 @@ def Recommend(user, train, W):
                 continue
             rank[i] += wuv * rvi
     return rank
-
diff --git a/src/py3.x/ML/15.BigData_MapReduce/proximalSVM.py b/src/py3.x/ML/15.BigData_MapReduce/proximalSVM.py
@@ -6,8 +6,12 @@
 @author: Peter/ApacheCN-xy/片刻
 《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
 '''
+import base64
+import pickle
+
 import numpy
 
+
 def map(key, value):
    # input key= class for one training example, e.g. "-1.0"
    classes = [float(item) for item in key.split(",")]   # e.g. [-1.0]
@@ -19,16 +23,16 @@ def map(key, value):
 
    # create matrix E and vector e
    e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
-   E = numpy.matrix(numpy.append(A, -e, axis=1)) 
+   E = numpy.matrix(numpy.append(A, -e, axis=1))
 
    # create a tuple with the values to be used by reducer
    # and encode it with base64 to avoid potential trouble with '\t' and '\n' used
    # as default separators in Hadoop Streaming
-   producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))    
+   producedvalue = base64.b64encode(pickle.dumps((E.T*E, E.T*D*e)))
 
    # note: a single constant key "producedkey" sends to only one reducer
    # somewhat "atypical" due to low degree of parallism on reducer side
-   print "producedkey\t%s" % (producedvalue)
+   print("producedkey\t%s" % (producedvalue))
 
 def reduce(key, values, mu=0.1):
   sumETE = None
@@ -52,4 +56,4 @@ def reduce(key, values, mu=0.1):
     # note: omega = result[:-1] and gamma = result[-1]
     # but printing entire vector as output
     result = sumETE.I*sumETDe
-    print "%s\t%s" % (key, str(result.tolist()))
+    print("%s\t%s" % (key, str(result.tolist())))
diff --git a/src/py3.x/ML/8.Regression/regression.py b/src/py3.x/ML/8.Regression/regression.py
@@ -582,3 +582,5 @@ def regression5():
     # regression3()
     # regression4()
     # regression5()
+    pass
+