notebook.community

Edit and run



In [1]:

    
from os import path
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
import jieba
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import datawash



In [2]:

    
# 遍历所有用户，提取点赞，感谢，被关注，被收藏等数据
keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']
dicts = [dict() for i in range(len(keys))]
jsons = datawash.datajsons()
for user in jsons:
    for i in range(len(keys)):
        if not user['name'] in dicts[i]:
            try:
                dicts[i][user['name']] = user[keys[i]]
            except:
                pass



In [3]:

    
# 字体路径
font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
mask = np.array(Image.open(path.join('image', "mask1.png")))
image_colors = ImageColorGenerator(mask)
keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']

for i in range(len(dicts)):
    # 设置字体，尺寸，生成词云
    wc = WordCloud(font_path=font,background_color='white',max_font_size=250,mask=mask).generate_from_frequencies(dicts[i])
    
    # 绘图
    fig,ax = plt.subplots(1,1)
    ax.imshow(wc, interpolation="bilinear")
    ax.axis("off")
    # 存储
    wc.to_file(path.join('image',keys[i]+'.png'))
    fig.show()



In [4]:

    
# 遍历所有用户，提取学校，专业，公司，职位，行业等数据
dicts2 = [dict() for i in range(5)]
jsons = datawash.datajsons()
for user in jsons:
    try:
        if not user['educations'][0]['school']['name'] in dicts2[0]:
            dicts2[0][user['educations'][0]['school']['name']] = 1
        else:
            dicts2[0][user['educations'][0]['school']['name']] += 1
    except:
        pass
    
    try:
        if not user['educations'][0]['major']['name'] in dicts2[1]:
            dicts2[1][user['educations'][0]['major']['name']] = 1
        else:
            dicts2[1][user['educations'][0]['major']['name']] += 1
    except:
        pass
    
    try:
        if not user['employments'][0]['company']['name'] in dicts2[2]:
            dicts2[2][user['employments'][0]['company']['name']] = 1
        else:
            dicts2[2][user['employments'][0]['company']['name']] += 1
    except:
        pass
    
    try:
        if not user['employments'][0]['job']['name'] in dicts2[3]:
            dicts2[3][user['employments'][0]['job']['name']] = 1
        else:
            dicts2[3][user['employments'][0]['job']['name']] += 1
    except:
        pass
    
    try:
        if not user['business']['name'] in dicts2[4]:
            dicts2[4][user['business']['name']] = 1
        else:
            dicts2[4][user['business']['name']] += 1
    except:
        pass



In [5]:

    
# 字体路径
font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
mask = np.array(Image.open(path.join('image', "mask2.png")))
image_colors = ImageColorGenerator(mask)
keys = ['school','major','company','job','business']

for i in range(len(dicts2)):
    # 设置字体，尺寸，生成词云
    wc = WordCloud(font_path=font,background_color='white',max_font_size=200,mask=mask).generate_from_frequencies(dicts2[i])
    
    # 绘图
    fig,ax = plt.subplots(1,1)
    ax.imshow(wc, interpolation="bilinear")
    ax.axis("off")
    # 存储
    wc.to_file(path.join('image',keys[i]+'.png'))
    fig.show()



In [73]:

    
# # 遍历所有用户，提取用户名，签名，个人描述
# keys = ['name','headline','description']
# lists = [list() for i in range(len(keys))]
# jsons = datawash.datajsons()
# for user in jsons:
#     for i in range(len(keys)):
#         try:
#             lists[i].append(user[keys[i]])
#         except:
#             pass

# dicts3 = [dict() for i in range(len(keys))]
# for i in range(len(lists)):
#     for j in lists[i]:
#         seg_list = jieba.cut(j, cut_all=False)
#         for k in seg_list:
#             if not k in dicts3[i]:
#                 dicts3[i][k] = 1
#             else:
#                 dicts3[i][k] += 1



In [78]:

    
# # 字体路径
# font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')
# mask = np.array(Image.open(path.join('image', "mask2.png")))
# image_colors = ImageColorGenerator(mask)
# keys = ['name','headline','description']

# for i in range(len(dicts3)):
#     # 设置字体，尺寸，生成词云
#     wc = WordCloud(font_path=font,background_color='white',mask=mask).generate_from_frequencies(dicts3[i])
    
#     # 绘图
#     fig,ax = plt.subplots(1,1)
#     ax.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
#     ax.axis("off")
#     # 存储
#     wc.to_file(path.join('image',keys[i]+'.png'))
#     fig.show()