添加捐赠链接+免责声明

DallasAutumn · May 16, 2018 · a5e452f · a5e452f
1 parent f8c1e2e
commit a5e452f
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -10,6 +10,8 @@
 * -- 视频网站：优酷 ／bilibili / Acfun / 网易云课堂，可直接在线播放。（最下方有相应链接）
 * -- [群小哥哥：红色石头的机器学习之路](https://github.com/RedstoneWill): [台湾大学林轩田机器学习基石课程 - **系列学习笔记**](https://mp.weixin.qq.com/s/xb0i55zIQVzCiSZALbvncg)
 
+**<font color=red>ApacheCN 网友捐赠页面</font>：http://www.apachecn.org/organization/664.html**
+
 ## 机器学习
 
 <table>
@@ -450,6 +452,11 @@
 | 优酷 | 网易云课堂 |
 | <a title="YouKu（机器学习视频）" href="http://i.youku.com/apachecn" target="_blank"><img width="290" src="images/MainPage/ApacheCM-ML-youku.jpg"></a> | <a title="WangYiYunKeTang（机器学习视频）" href="http://study.163.com/course/courseMain.htm?courseId=1004582003" target="_blank"><img width="290" src="images/MainPage/ApacheCM-ML-WangYiYunKeTang.png"></a> |
 
+## 免责声明
+
+ApacheCN 纯粹出于学习目的与个人兴趣翻译本书。
+
+本译文只供学习研究参考之用。ApacheCN 保留对此版本译文的署名权及其它相关权利。
 
 ## [ApacheCN 组织资源](http://www.apachecn.org/)
 

diff --git a/src/py3.x/NLP/1.语言处理与Python/test.py b/src/py3.x/NLP/1.语言处理与Python/test.py
@@ -49,6 +49,33 @@
 print(text3.count("smote"))
 print(100*text4.count('a')/len(text4))
 
+
+# 计算一个文本中，平均一个字出现的次数（词汇多样性）
+def lexical_diversity(text):
+  return len(text) / len(set(text))
+
+
+def percentage(count, total):
+  return 100 * count / total
+
+
+"""
+测试案例
+
+In [32]: ex1 = ['Monty', 'Python', 'and', 'the', 'Holy', 'Grail']
+In [34]: sorted(ex1)
+Out[34]: ['Grail', 'Holy', 'Monty', 'Python', 'and', 'the']
+
+In [35]: len(set(ex1))
+Out[35]: 6
+
+In [36]: ex1.count("the")
+Out[36]: 1
+
+In [37]: ['Monty', 'Python'] + ['and', 'the', 'Holy', 'Grail']
+Out[37]: ['Monty', 'Python', 'and', 'the', 'Holy', 'Grail']
+"""
+
 # # 词的频率分布
 fdist1 = FreqDist(text1)
 # # 输出总的词数

diff --git a/src/py3.x/NLP/2.获得文本语料和词汇资源/test.py b/src/py3.x/NLP/2.获得文本语料和词汇资源/test.py
diff --git a/src/py3.x/NLP/play/test.py b/src/py3.x/NLP/play/test.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-  
+""" 
+Created on Mon Jun  5 09:04:16 2017 
+ 
+@author: Owner 
+"""  
+
+from os import path  
+from PIL import Image  
+import numpy as np  
+import matplotlib.pyplot as plt  
+
+from wordcloud import WordCloud, STOPWORDS  
+
+d = path.dirname(__file__)  
+
+# Read the whole text.  
+#我这里加载的文件是已经分词好的，如果加载的文件是没有分词的，还需要使用分词工具先进行分词    
+text = open(path.join(d, 'ctest2.txt'),encoding='utf-8').read()  
+
+# read the mask image  
+# taken from  
+# http://www.stencilry.org/stencils/movies/alice%20in%20wonderland/255fk.jpg  
+alice_mask = np.array(Image.open(path.join(d, "alice_mask.png")))  
+
+stopwords = set(STOPWORDS)  
+stopwords.add("said")  
+
+wc = WordCloud(  
+    #设置字体，不指定就会出现乱码,这个字体文件需要下载   
+    font_path="HYQiHei-25JF.ttf",  
+    background_color="white",   
+    max_words=2000,   
+    mask=alice_mask,  
+    stopwords=stopwords)  
+
+# generate word cloud  
+wc.generate(text)  
+
+# store to file  
+wc.to_file(path.join(d, "alice_cloud.png"))  
+
+# show  
+plt.imshow(wc, interpolation='bilinear')  
+plt.axis("off")  
+plt.figure()  
+plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation='bilinear')  
+plt.axis("off")  
+plt.show()