In [2]:
#导入itchat库
import itchat
#登陆微信,会弹出二维码扫描
itchat.login()
#爬取好友相关信息,返回一个json
friends = itchat.get_friends(update=True)[0:]


Getting uuid of QR code.
Downloading QR code.
Please scan the QR code to log in.
Please press confirm on your phone.
Loading the contact, this may take a little while.
Login successfully as 🍋盛娇娇

In [3]:
#初始化计算器
male = female = other = 0
#friends[0]是自己的信息,所以要从[1]开始
for i in friends[1:]:
    sex = i["Sex"]
    if sex == 1:
        male +=1
    elif sex == 2:
        female +=1
    else:
        other +=1
#计算朋友总和        
total = len(friends[1:])
#打印出性别比例
print("男性好友: %.2f%%" % (float(male)/total*100) + "\n" +
      "女性好友: %.2f%%" % (float(female)/total*100) + "\n" +
      "不明性别好友: %.2f%%" % (float(other)/total*100) )


男性好友: 43.81%
女性好友: 50.21%
不明性别好友: 5.98%

In [ ]:
#定义一个函数,用来爬去各个变量
def get_var(var):
    variable = []
    for i in friends:
        value = i[var]
        variable.append(value)
    return variable
#调用各个变量,并把数据存到csv文件
NickName = get_var("NickName")
Sex = get_var('Sex')
Province = get_var('Province') 
City = get_var('City')
Signature = get_var('Signature')
from pandas import DataFrame
data = {'NickName': NickName,'Sex':Sex,'Province':Province,'City':City,'Signature':Signature}
frame = DataFrame(data)
frame.to_csv('data.csv',index=True)

In [ ]:
import re
siglist = []
#替换掉emoji等无关字符
for i in friends:
    signature = i["Signature"].strip().replace("span","").replace("class","").replace("emoji","")
    rep = re.compile("1f\d+\w*|[<>/=]")
    signature = rep.sub("", signature)
    siglist.append(signature)
text = "".join(siglist)
#用结巴分词包进行分词
import jieba
wordlist = jieba.cut(text, cut_all=True)
word_space_split = " ".join(wordlist)
#进行关键词画像
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
import PIL.Image as Image
coloring = np.array(Image.open("/Users/Lea/Downloads/wechat.JPG"))
my_wordcloud = WordCloud(background_color="white", max_words=2000,
                         mask=coloring, max_font_size=60, random_state=42, scale=2,
                         font_path="/Library/Fonts/Microsoft/SimHei.ttf").generate(word_space_split)

image_colors = ImageColorGenerator(coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()


Building prefix dict from the default dictionary ...
Loading model from cache /var/folders/36/82mxc7_d48x3hf___5qvfhkm0000gn/T/jieba.cache
Loading model cost 1.466 seconds.
Prefix dict has been built succesfully.
LOG OUT!