In [1]:
from os import path
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
import jieba
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import datawash
In [2]:
# 遍历所有用户,提取点赞,感谢,被关注,被收藏等数据
keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']
dicts = [dict() for i in range(len(keys))]
jsons = datawash.datajsons()
for user in jsons:
for i in range(len(keys)):
if not user['name'] in dicts[i]:
try:
dicts[i][user['name']] = user[keys[i]]
except:
pass
In [3]:
# 字体路径
font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
mask = np.array(Image.open(path.join('image', "mask1.png")))
image_colors = ImageColorGenerator(mask)
keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']
for i in range(len(dicts)):
# 设置字体,尺寸,生成词云
wc = WordCloud(font_path=font,background_color='white',max_font_size=250,mask=mask).generate_from_frequencies(dicts[i])
# 绘图
fig,ax = plt.subplots(1,1)
ax.imshow(wc, interpolation="bilinear")
ax.axis("off")
# 存储
wc.to_file(path.join('image',keys[i]+'.png'))
fig.show()
In [4]:
# 遍历所有用户,提取学校,专业,公司,职位,行业等数据
dicts2 = [dict() for i in range(5)]
jsons = datawash.datajsons()
for user in jsons:
try:
if not user['educations'][0]['school']['name'] in dicts2[0]:
dicts2[0][user['educations'][0]['school']['name']] = 1
else:
dicts2[0][user['educations'][0]['school']['name']] += 1
except:
pass
try:
if not user['educations'][0]['major']['name'] in dicts2[1]:
dicts2[1][user['educations'][0]['major']['name']] = 1
else:
dicts2[1][user['educations'][0]['major']['name']] += 1
except:
pass
try:
if not user['employments'][0]['company']['name'] in dicts2[2]:
dicts2[2][user['employments'][0]['company']['name']] = 1
else:
dicts2[2][user['employments'][0]['company']['name']] += 1
except:
pass
try:
if not user['employments'][0]['job']['name'] in dicts2[3]:
dicts2[3][user['employments'][0]['job']['name']] = 1
else:
dicts2[3][user['employments'][0]['job']['name']] += 1
except:
pass
try:
if not user['business']['name'] in dicts2[4]:
dicts2[4][user['business']['name']] = 1
else:
dicts2[4][user['business']['name']] += 1
except:
pass
In [5]:
# 字体路径
font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
mask = np.array(Image.open(path.join('image', "mask2.png")))
image_colors = ImageColorGenerator(mask)
keys = ['school','major','company','job','business']
for i in range(len(dicts2)):
# 设置字体,尺寸,生成词云
wc = WordCloud(font_path=font,background_color='white',max_font_size=200,mask=mask).generate_from_frequencies(dicts2[i])
# 绘图
fig,ax = plt.subplots(1,1)
ax.imshow(wc, interpolation="bilinear")
ax.axis("off")
# 存储
wc.to_file(path.join('image',keys[i]+'.png'))
fig.show()
In [73]:
# # 遍历所有用户,提取用户名,签名,个人描述
# keys = ['name','headline','description']
# lists = [list() for i in range(len(keys))]
# jsons = datawash.datajsons()
# for user in jsons:
# for i in range(len(keys)):
# try:
# lists[i].append(user[keys[i]])
# except:
# pass
# dicts3 = [dict() for i in range(len(keys))]
# for i in range(len(lists)):
# for j in lists[i]:
# seg_list = jieba.cut(j, cut_all=False)
# for k in seg_list:
# if not k in dicts3[i]:
# dicts3[i][k] = 1
# else:
# dicts3[i][k] += 1
In [78]:
# # 字体路径
# font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
# mask = np.array(Image.open(path.join('image', "mask2.png")))
# image_colors = ImageColorGenerator(mask)
# keys = ['name','headline','description']
# for i in range(len(dicts3)):
# # 设置字体,尺寸,生成词云
# wc = WordCloud(font_path=font,background_color='white',mask=mask).generate_from_frequencies(dicts3[i])
# # 绘图
# fig,ax = plt.subplots(1,1)
# ax.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
# ax.axis("off")
# # 存储
# wc.to_file(path.join('image',keys[i]+'.png'))
# fig.show()