词云的使用
wordcloud包安装还是有点困难的!!!
不支持中文!!!
import wordcloud
myfont = r'C:\Windows\Fonts\simhei.ttf'
text = 'this is shagnhai, 郭靖, 和, 哀牢山 三十六剑'
cloudobj = wordcloud.WordCloud(font_path=myfont).generate(text)
cloudobj
import matplotlib.pyplot as plt
plt.imshow(cloudobj)
# plt.axis('off')
plt.show()
import wordcloud
import matplotlib.pyplot as plt
# 更改词云参数设定
myfont = r'C:\Windows\Fonts\simhei.ttf'
text = 'this is shagnhai, 郭靖, 和, 哀牢山 三十六剑'
cloudobj = wordcloud.WordCloud(font_path=myfont, width=360, height=180, mode='RGBA', background_color=None).generate(text)
plt.imshow(cloudobj)
plt.axis('off')
plt.show()
# 保存词云
cloudobj.to_file("词云.png")
import pandas as pd
import jieba
import wordcloud
import matplotlib.pylab as plt
myfont = myfont = r'C:\Windows\Fonts\simhei.ttf'
text = chapter.txt[1] # 在上方处理后拿到的数据
pd_stop_word = pd.read_csv('../data/停用词.txt', names=['w', 'z'], encoding='utf-8')
pd_stop_word_list = list(pd_stop_word.w.astype(str)) # 加上astype(str),防止报错
def m_cut(text):
return [word for word in jieba.cut(text) if word not in pd_stop_word_list]
cloudobj = wordcloud.WordCloud(font_path=myfont, width=1200, height=800, mode='RGBA', background_color=None, stopwords=pd_stop_word_list).generate(' '.join(jieba.lcut(text)))
plt.imshow(cloudobj)
plt.axis("off")
plt.show()
这种写法也是可以的,每次刷新图片颜色和位置都会发生变化的
import pandas as pd
import jieba
import wordcloud
import matplotlib.pylab as plt
myfont = myfont = r'C:\Windows\Fonts\simhei.ttf'
text = chapter.txt[1] # 在上方处理后拿到的数据
pd_stop_word = pd.read_csv('../data/停用词.txt', names=['w', 'z'], encoding='utf-8')
pd_stop_word_list = list(pd_stop_word.w.astype(str)) # 加上astype(str),防止报错
def m_cut(text):
return [word for word in jieba.cut(text) if word not in pd_stop_word_list]
cloudobj = wordcloud.WordCloud(font_path=myfont, width=1200, height=800, mode='RGBA', background_color=None).generate(' '.join(m_cut(text)))
plt.imshow(cloudobj)
plt.axis("off")
plt.show()
# 保存图片
cloudobj.to_file('词云2.png')
# 基于词频绘制词云
text_freq = {'张三': 200, '李四': 50, '王五': 130, '刘七':90}
cloudobj = wordcloud.WordCloud(font_path=myfont).fit_words(text_freq)
plt.imshow(cloudobj)
plt.axis('off')
plt.show()
import nltk
import wordcloud
from nltk import FreqDist
myfont = myfont = r'C:\Windows\Fonts\simhei.ttf'
def m_cut(text):
return [word for word in jieba.cut(text) if word not in pd_stop_word_list]
tokens = m_cut(chapter.txt[1])
fdist = FreqDist(tokens) # 生成完整的词条统计字典
print(type(fdist))
cloudobj = wordcloud.WordCloud(font_path=myfont).fit_words(fdist)
plt.imshow(cloudobj)
plt.axis('off')
plt.show()