python 统计词频 nltk库用法

##import nltk
##nltk.set_proxy('http://127.0.0.1:1080',('czliutz@qq.com', 'l2100912z'))
##nltk.download()
#家里的电脑环境,无法链接nltk语料库

from nltk.corpus import gutenberg
from nltk.probability import *
gutenberg.fileids()
allwords = gutenberg.words('shakespeare-hamlet.txt')
print("Totlal words is:",len(allwords))
fd2 = FreqDist([sx.lower() for sx in allwords if sx.isalpha()])
print("单词个数:",fd2.B())  #不同单词个数
print("单词总数数:",fd2.N())  #所有单词的个数
fd2.tabulate(20)
fd2.plot(20)
fd2.plot(20,cumulative =True)

For more information see: https://www.nltk.org/data.html