In [1]:
import os
import requests
import pandas as pd
from datetime import datetime
In [2]:
# 透過 Graph API 觀察文章 ID 與 token
article_id = '232633627068_10156769966527069'
token = ''
In [3]:
comments = []
pages = 0
"""
nested query + 游標型分頁
%7B => {
%7D => }
%2C => ,
reference: https://www.w3schools.com/tags/ref_urlencode.asp
"""
base_url = 'https://graph.facebook.com/v2.11/{}'.format(article_id)
query = '?fields=comments.limit({})%7Battachment%2Capplication%2Cmessage.limit({})%7D&access_token={}'.format(
10, 100, token
)
url = '{}/{}'.format(base_url, query)
while True:
pages += 1
resp = requests.get(url)
data = resp.json()
if 'comments' not in data:
break
comments += data['comments']['data']
if 'after' not in data['comments']['paging']['cursors']:
print('EOF')
break
else:
cursors_after = data['comments']['paging']['cursors']['after']
query = '?fields=comments.limit({}).after({})%7Battachment%2Capplication%2Cmessage.limit({})%7D&access_token={}'.format(
10, cursors_after, 100, token
)
url = '{}/{}'.format(base_url, query)
print('pages {}'.format(pages))
print('comments length = {}'.format(len(comments)))
In [4]:
for comment in comments:
application, attachment, message = '', '', ''
if 'application' in comment:
app = {'application_{}'.format(k):v for k, v in comment['application'].items()}
comment.update(app)
del comment['application']
if 'attachment' in comment:
att = {
'attachment_type': comment['attachment']['type'],
'attachment_url': comment['attachment']['url']
}
comment.update(att)
del comment['attachment']
df = pd.DataFrame.from_records(comments)
df.head()
Out[4]:
In [5]:
results = os.path.abspath('../results')
if not os.path.exists(results):
os.makedirs(results)
filename = os.path.join(results, '{}.csv'.format(article_id))
df.to_csv(filename, index=False)
print('Save file - {}'.format(filename))