In [1]:
# 啟動互動式繪圖環境
%pylab inline
In [2]:
# 隨時更新才可用
ACCESS_TOKEN = "your TOKEN"
In [31]:
import facebook # pip install facebook-sdk
import requests
from IPython.display import Image
import json
import re
import dateutil.parser as dateparser
import datetime
import matplotlib.pyplot as plt
# A helper function to pretty-print Python objects as JSON
def pp(o):
print json.dumps(o, indent=1)
# Create a connection to the Graph API with your access token
g = facebook.GraphAPI(ACCESS_TOKEN)
In [32]:
# 以dcard為例 填上dcard的id 在 url 可以找到
about = g.get_object('dcard.tw')
# 點讚數
print('點讚數:'+str(about['likes']))
# 談論數
print('談論數:'+str(about['talking_about_count']))
# 分類
print('分類:'+about['category'])
# name
print('name:'+about['name'])
# id
print('id:'+about['id'])
# 成立時間
print('成立時間:'+about['founded'])
# 描述
print('描述')
print(about['description'])
In [5]:
connection_type = 'feed'
feed = g.get_connections(about['id'], connection_type, limit=4)
In [6]:
Image(filename='/Users/wy/Desktop/fb1.png')
Out[6]:
In [7]:
# 重覆讀取next url 資訊
def resUrl(content,total_posts):
# debug用
for one in content['data']:
total_posts.append(one)
if content['paging'].has_key("next"):
content = requests.get(content['paging']['next']).json()
resUrl(content,total_posts)
else:
for one in content['data']:
total_posts.append(one)
# 計算總點讚
def likesSum(message):
# 該篇留言的likes數
total_posts = [ one for one in message['likes']['data']]
# 該篇留言的likes數 nextpage
if message['likes']['paging'].has_key("next"):
content = requests.get(message['likes']['paging']['next']).json()
resUrl(content,total_posts)
return total_posts
# 計算總留言
def commentsSum(message):
# 該篇留言的comments數
total_posts = [ one for one in message['comments']['data']]
# 該篇留言的comments數 nextpage
if message['comments']['paging'].has_key("next"):
content = requests.get(message['comments']['paging']['next']).json()
resUrl(content,total_posts)
return total_posts
# 貼文資訊生成
def messageInfo(message):
messageDist={}
sumLikes = likesSum(message)
sumComments = commentsSum(message)
# 創立時間
messageDist['created_time'] = message['created_time']
# 塗鴉牆訊息
messageDist['message'] = message['message']
# id
messageDist['id'] = message['id']
# 分享人數
try:
messageDist['shares_count'] = message['shares']['count']
except:
messageDist['shares_count'] = "0"
# 更新時間
messageDist['updated_time'] = message['updated_time']
# 按讚內容
messageDist['likes_messages'] = sumLikes
# 按讚人數
messageDist['likes_count'] = len(sumLikes)
# 留言內容
messageDist['comments_messages'] = sumComments
# 留言人數
messageDist['comments_count'] = len(sumComments)
return messageDist
In [14]:
# 示範 從4/22 14點 取到 4/21 16點 貼文資訊
# [年,月,時]
startTimeInput = ['04','22','14']
endTimeInput = ['04','22','16']
messageInfoAll=[]
startTime = '2015-'+startTimeInput[0]+'-'+startTimeInput[1]+'T'+startTimeInput[2]+':00:00+0000'
endTime = '2015-'+endTimeInput[0]+'-'+endTimeInput[1]+'T'+endTimeInput[2]+':00:00+0000'
sd= dateparser.parse(startTime)
fd= dateparser.parse(endTime)
for messageNum in range(len(feed['data'])):
if sd <= dateparser.parse(feed['data'][messageNum]['created_time']) <= fd:
messageInfoAll.append(messageInfo(feed['data'][messageNum]))
In [15]:
# 只有抓取到兩則
len(messageInfoAll)
Out[15]:
In [16]:
for num in range(len(feed['data'])):
print(feed['data'][num]['created_time'])
In [29]:
Image(filename='/Users/wy/Desktop/fb2.png')
Out[29]:
In [30]:
Image(filename='/Users/wy/Desktop/fb3.png')
Out[30]:
In [25]:
# 取得抓取到第一篇id
messageInfoAll[0]['id']
print(messageInfoAll[0]['message'])
# 抽出該貼文留言之姓名
# 設定人數
peopleNum = 5
print('----------------------------')
for num in range(len(messageInfoAll)):
if messageInfoAll[num]['id'] == messageInfoAll[0]['id']:
for tmp in range(peopleNum):
numr = randint(0,len(messageInfoAll[num]['comments_messages'])-1)
print(messageInfoAll[num]['comments_messages'][numr][u'from'][u'name'])
In [65]:
def plotFrequent(num):
t=[]
for a in range(len(messageInfoAll[num]['comments_messages'])):
t.append(dateparser.parse(messageInfoAll[num]['comments_messages'][a]['created_time']))
dd={}
for a in t:
a = a.hour
if dd.has_key(a):
dd[a]+=1
else:
dd[a]=1
x = [key for key in sorted(dd.iterkeys())]
y = [dd[key] for key in sorted(dd.iterkeys())]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x, y)
fig.show()
In [71]:
plotFrequent(0)
In [72]:
plotFrequent(1)
In [73]:
# 存成json日後分析
with open('/Users/wy/Desktop/fbreport.json', 'w') as f:
f.write(json.dumps(messageInfoAll, indent=1))