練習


In [1]:
import os
import requests
import pandas as pd

from datetime import datetime

In [2]:
# 透過 Graph API 觀察文章 ID 與 token
fanpage_id = '136845026417486'
token = ''

In [3]:
url = 'https://graph.facebook.com/v2.11/{}/posts?fields={}&access_token={}'.format(
    fanpage_id, 'id,created_time,name,likes.limit(0).summary(true),shares,message', token
)

posts = []
pages = 0

while True:
    resp = requests.get(url)
    data = resp.json()
    posts += data['data']
    pages += 1
    
    if 'next' not in data['paging']:
        print('EOF')
        break
        
    else:
        url = data['paging']['next']
        print('page {}'.format(pages))


page 1
page 2
page 3
page 4
page 5
page 6
page 7
page 8
page 9
page 10
page 11
page 12
page 13
page 14
page 15
page 16
page 17
page 18
page 19
page 20
page 21
page 22
page 23
page 24
page 25
page 26
page 27
page 28
page 29
page 30
page 31
page 32
page 33
page 34
page 35
page 36
page 37
page 38
page 39
page 40
page 41
page 42
page 43
page 44
page 45
page 46
page 47
page 48
page 49
page 50
page 51
page 52
page 53
page 54
page 55
page 56
page 57
EOF

In [4]:
posts_summary = []
for post in posts:
    p = {}
    for k, v in post.items():
        if k == 'likes' and 'summary' in v and 'total_count' in v['summary']:
            p['total_likes'] = v['summary']['total_count']
        elif k == 'shares' and 'count' in v:
            p['total_shares'] = v['count']
        else:
            p[k] = v
    posts_summary.append(p)

df = pd.DataFrame.from_records(posts_summary)
df.head()


Out[4]:
created_time id message name total_likes total_shares
0 2018-01-29T10:07:27+0000 136845026417486_1230167763751868 來荷蘭烏特勒支市走一走,看看他們如何創造一個友善的自行車通行環境。\n\n---\nPart... 直播|考察荷蘭自行車設施(Part 2) 9022 131.0
1 2018-01-29T09:40:44+0000 136845026417486_1230143707087607 來荷蘭烏特勒支市走一走,看看他們如何創造一座全世界最大的自行車停車場。\n\n---\nPa... 直播|考察荷蘭自行車設施(Part 1) 10470 181.0
2 2018-01-28T03:30:00+0000 136845026417486_1228976073871037 每次出訪,都是一次難得的學習機會,這一趟歐洲行也不例外。\n\n荷蘭一直是我想去好好研究的地... Timeline Photos 38594 400.0
3 2018-01-27T13:15:49+0000 136845026417486_1228569593911685 很多人都聽過「順手捐發票,救救老殘窮」,也看過在路上推著烤爐賣烤地瓜的「地瓜媽媽」,這些都是... NaN 12317 192.0
4 2018-01-26T09:29:05+0000 136845026417486_1227573790677932 政治就是落實在人民的每一天生活之中,讓人民有好的居住環境,應當是中央和地方一致認同的進步價值... Photos from 柯文哲's post 9145 86.0

In [5]:
results = os.path.abspath('../results')
if not os.path.exists(results):
    os.makedirs(results)
    
filename = os.path.join(results, 'fanpage_{}.csv'.format(fanpage_id))
df.to_csv(filename, index=False)
print('Save file - {}'.format(filename))


Save file - /home/dirl/github/Python-Crawling-Tutorial/results/fanpage_136845026417486.csv