Hourly collecting Facebook pages' data

Import libraries and create functions :



In [5]:

    
import facebook # for connecting to Facebook Graph API
import pprint
import datetime
import pandas as pd
import logging

logger = logging.Logger('catch_all')

# send request to Facebook Graph API, fetching last 50 posts of each page :
def collector(page, token, lim) :
    graph = facebook.GraphAPI(access_token = token, version = '2.7')
    # fetch and transform JSON to dict :
    posts = graph.get_connections(id = page, connection_name = 'posts',
                                  fields = 'id,message,link,shares,from,type,created_time,updated_time,'
                                  'comments.limit(0).summary(true),likes.limit(0).summary(true),reactions.limit(0).summary(true)',
                                  limit = lim)
    data = posts['data'] # dict
    loop = 0
    mylist = []
    
    # parse data from dict :
    for el in data:
        mydict = {}
        for key in el:
            if key == 'comments':
                commentcount = el[key]['summary']['total_count']
                mydict['comment_count'] = commentcount
            elif key == 'from':
                creator = el[key]['name']
                mydict['from'] = creator
            elif key == 'likes':
                likecount = el[key]['summary']['total_count']
                mydict['like_count'] = likecount
            elif key == 'reactions':
                reactcount = el[key]['summary']['total_count']
                mydict['reaction_count'] = reactcount
            elif key == 'shares':
                share = el[key]['count']
                mydict['share_count'] = share
            else:
                mydict[key] = el[key]
        time = datetime.datetime.now() # add timestamp
        mydict['time_checked'] = str(time)
        mylist.append(mydict)
        loop += 1
    df = pd.DataFrame(mylist) # pandas dataframe
    return df

# create a new csv file of each page :
def initiator(page, token, lim) :
    df = collector(page, token, lim)
    df.to_csv(page + '.csv', index = False, encoding = 'utf_8_sig')

# receive post's ids and collect new data of these extra posts :
def collector_post(post_ids, token) :
    graph = facebook.GraphAPI(access_token = token, version = '2.7')
    for idvalue in post_ids:
        if (idvalue == 'post deleted' or idvalue == 'ERROR in exception' or 
            idvalue == 'Something wrong' or idvalue == 'Unexpected condition in appending()'):
            post_ids.remove(idvalue)
    try:
        posts = graph.get_objects(ids = post_ids,
                            fields = 'id,message,link,shares,from,type,created_time,updated_time,'
                            'comments.limit(0).summary(true),likes.limit(0).summary(true),reactions.limit(0).summary(true)')
    except Exception as e:
        logger.error(e, exc_info=True)
        time = str(datetime.datetime.now())
        return pd.DataFrame([{'id' : 'ERROR in exception', 'time_checked' : time}])
    #--------------------------------------------
    if len(posts) > 0:
        loop = 0
        mylist = []
        for id_key in posts:
            mydict = {}
            for key in posts[id_key]:
                if key == 'comments':
                    commentcount = posts[id_key][key]['summary']['total_count']
                    mydict['comment_count'] = commentcount
                elif key == 'from':
                    creator = posts[id_key][key]['name']
                    mydict['from'] = creator
                elif key == 'likes':
                    likecount = posts[id_key][key]['summary']['total_count']
                    mydict['like_count'] = likecount
                elif key == 'reactions':
                    reactcount = posts[id_key][key]['summary']['total_count']
                    mydict['reaction_count'] = reactcount
                elif key == 'shares':
                    share = posts[id_key][key]['count']
                    mydict['share_count'] = share
                else:
                    mydict[key] = posts[id_key][key]
            time = datetime.datetime.now()
            mydict['time_checked'] = str(time)
            mylist.append(mydict)
            loop += 1
        df = pd.DataFrame(mylist)
        return df
    elif len(posts) == 0:
        return pd.DataFrame([{'id' : 'post deleted', 'time_checked' : str(datetime.datetime.now())}])
    else:
        return pd.DataFrame([{'id' : 'Something wrong', 'time_checked' : str(datetime.datetime.now())}])

# fetch new data and update the existing csv files when this function is called :
def appending(page, token, lim) :
    df = collector(page, token, lim) # fetch last 50 posts
    df_old = pd.read_csv(page + '.csv', encoding='utf_8_sig')
    df_new = pd.concat([df_old, df], axis = 0)
    # find extra post's ids that are not included in the last 50 posts but existed in the csv file : 
    id_old = df_old['id'].unique()
    id_extra = set(id_old) - set(df['id'])
    id_extra = list(id_extra)
    # slice all extra ids into chunks and send each chunk to collector_post()
    if len(id_extra) > 50:
        chunks = [id_extra[x:x+50] for x in range(0, len(id_extra), 50)]
        for ids in chunks:
            extrarow = collector_post(ids, token)
            df_new = pd.concat([df_new, extrarow], axis = 0)
    elif len(id_extra) > 0:
        extrarow = collector_post(id_extra, token)
        df_new = pd.concat([df_new, extrarow], axis = 0)
    elif len(id_extra) == 0:
        pass
    else:
        time = str(datetime.datetime.now())
        extrarow = pd.DataFrame([{'id' : 'Unexpected condition in appending()', 'time_checked' : time}])
        df_new = pd.concat([df_new, extrarow], axis = 0)
    # update csv
    df_new.to_csv(page + '.csv', index = False, encoding = 'utf_8_sig')
    print('to csv page : {} done !'.format(page))

Set up variables:

Note: Put your Facebook secret token into 'token' variable before running



In [6]:

    
# all page ids that I want to collect data :
pages = ['DramaAdd', 'ejeab', 'cartooneggcat', 'BBCThai', 'in.one.zaroop', 'HighlightsHD.tv', 'khobsanam', '1447102878929950',
         'powerofhusbands', 'basementkaraoke', 'cartoon5natee', 'AjahnBuddhadasa', 'Toodsdiary', 'ceclip', 'beargirlfriend',
         'jaytherabbitofficial', 'Darlingboredom', 'v.vajiramedhi', '334236760084743', 'kingdomoftigers', 'underbedstar', 'pantipded',
         'Pantip.KratooDed', 'nut.ped', '9gaginthai']
# a Facebook secret token :
token = 'Your Secret Token'

# amount of posts you want to collect at the first time
lim = 50

First, run this code to create new csv files of all pages :

Note: All files will be created in the same directory as this script.



In [7]:

    
print(str(datetime.datetime.now()))
for page in pages:
    initiator(page, token, lim)
print(str(datetime.datetime.now()))









    



2017-01-08 18:51:47.276207
2017-01-08 18:52:08.342411

A csv file example :



In [9]:

    
df = pd.read_csv('BBCThai.csv', encoding = 'utf_8_sig')
df.head(5)









    Out[9]:






  
    
      
      comment_count
      created_time
      from
      id
      like_count
      link
      message
      reaction_count
      share_count
      time_checked
      type
      updated_time
    
  
  
    
      0
      3
      2017-01-08T11:36:18+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872082379679460
      205
      https://www.facebook.com/BBCThai/videos/187208...
      มิเชล โอบามา กล่าวสุนทรพจน์อย่างเป็นทางการครั้...
      213
      30
      2017-01-08 18:51:50.754405
      video
      2017-01-08T11:45:06+0000
    
    
      1
      8
      2017-01-08T11:13:47+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872076769680021
      370
      http://bbc.in/2i2CjZQ
      พระสงฆ์รูปหนึ่งจุดไฟเผาตัวเองประท้วงรัฐบาลเกาห...
      414
      25
      2017-01-08 18:51:50.754405
      link
      2017-01-08T11:46:32+0000
    
    
      2
      2
      2017-01-08T10:06:32+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872059309681767
      182
      https://www.facebook.com/BBCThai/videos/187205...
      แก้ไข : คำผิดในวิดีโอช่วงนาทีที่ 0.56 \r\nจากค...
      186
      13
      2017-01-08 18:51:50.754405
      video
      2017-01-08T10:28:13+0000
    
    
      3
      30
      2017-01-08T09:44:38+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872055029682195
      723
      http://bbc.in/2j50LGO
      หนังสือ “การต่อสู้ของข้าพเจ้า” (Mein Kampf) ขอ...
      756
      78
      2017-01-08 18:51:50.754405
      link
      2017-01-08T11:39:12+0000
    
    
      4
      8
      2017-01-08T09:24:53+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872050469682651
      434
      https://www.facebook.com/BBCThai/videos/187205...
      อนาคตพนักงานพิมพ์ดีดริบหรี่ ในยุคไอทีรุกเมียนม...
      456
      59
      2017-01-08 18:51:50.754405
      video
      2017-01-08T10:25:27+0000

Then, run this code hourly to update all of the csv files :



In [10]:

    
print(str(datetime.datetime.now()))
for page in pages :
    appending(page, token, lim)
print(str(datetime.datetime.now()))









    



2017-01-08 18:54:48.491571
to csv page : DramaAdd done !
to csv page : ejeab done !
to csv page : cartooneggcat done !
to csv page : BBCThai done !
to csv page : in.one.zaroop done !
to csv page : HighlightsHD.tv done !
to csv page : khobsanam done !
to csv page : 1447102878929950 done !
to csv page : powerofhusbands done !
to csv page : basementkaraoke done !
to csv page : cartoon5natee done !
to csv page : AjahnBuddhadasa done !
to csv page : Toodsdiary done !
to csv page : ceclip done !
to csv page : beargirlfriend done !
to csv page : jaytherabbitofficial done !
to csv page : Darlingboredom done !
to csv page : v.vajiramedhi done !
to csv page : 334236760084743 done !
to csv page : kingdomoftigers done !
to csv page : underbedstar done !
to csv page : pantipded done !
to csv page : Pantip.KratooDed done !
to csv page : nut.ped done !
to csv page : 9gaginthai done !
2017-01-08 18:55:11.133865

An updated csv example :



In [11]:

    
df = pd.read_csv('BBCThai.csv', encoding = 'utf_8_sig')
df.head(5)









    Out[11]:






  
    
      
      comment_count
      created_time
      from
      id
      like_count
      link
      message
      reaction_count
      share_count
      time_checked
      type
      updated_time
    
  
  
    
      0
      3
      2017-01-08T11:36:18+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872082379679460
      205
      https://www.facebook.com/BBCThai/videos/187208...
      มิเชล โอบามา กล่าวสุนทรพจน์อย่างเป็นทางการครั้...
      213
      30
      2017-01-08 18:51:50.754405
      video
      2017-01-08T11:45:06+0000
    
    
      1
      8
      2017-01-08T11:13:47+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872076769680021
      370
      http://bbc.in/2i2CjZQ
      พระสงฆ์รูปหนึ่งจุดไฟเผาตัวเองประท้วงรัฐบาลเกาห...
      414
      25
      2017-01-08 18:51:50.754405
      link
      2017-01-08T11:46:32+0000
    
    
      2
      2
      2017-01-08T10:06:32+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872059309681767
      182
      https://www.facebook.com/BBCThai/videos/187205...
      แก้ไข : คำผิดในวิดีโอช่วงนาทีที่ 0.56 \r\r\nจา...
      186
      13
      2017-01-08 18:51:50.754405
      video
      2017-01-08T10:28:13+0000
    
    
      3
      30
      2017-01-08T09:44:38+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872055029682195
      723
      http://bbc.in/2j50LGO
      หนังสือ “การต่อสู้ของข้าพเจ้า” (Mein Kampf) ขอ...
      756
      78
      2017-01-08 18:51:50.754405
      link
      2017-01-08T11:39:12+0000
    
    
      4
      8
      2017-01-08T09:24:53+0000
      บีบีซีไทย - BBC Thai
      1526071940947174_1872050469682651
      434
      https://www.facebook.com/BBCThai/videos/187205...
      อนาคตพนักงานพิมพ์ดีดริบหรี่ ในยุคไอทีรุกเมียนม...
      456
      59
      2017-01-08 18:51:50.754405
      video
      2017-01-08T10:25:27+0000



In [ ]:

	comment_count	created_time	from	id	like_count	link	message	reaction_count	share_count	time_checked	type	updated_time
0	3	2017-01-08T11:36:18+0000	บีบีซีไทย - BBC Thai	1526071940947174_1872082379679460	205	https://www.facebook.com/BBCThai/videos/187208...	มิเชล โอบามา กล่าวสุนทรพจน์อย่างเป็นทางการครั้...	213	30	2017-01-08 18:51:50.754405	video	2017-01-08T11:45:06+0000
1	8	2017-01-08T11:13:47+0000	บีบีซีไทย - BBC Thai	1526071940947174_1872076769680021	370	http://bbc.in/2i2CjZQ	พระสงฆ์รูปหนึ่งจุดไฟเผาตัวเองประท้วงรัฐบาลเกาห...	414	25	2017-01-08 18:51:50.754405	link	2017-01-08T11:46:32+0000
2	2	2017-01-08T10:06:32+0000	บีบีซีไทย - BBC Thai	1526071940947174_1872059309681767	182	https://www.facebook.com/BBCThai/videos/187205...	แก้ไข : คำผิดในวิดีโอช่วงนาทีที่ 0.56 \r\nจากค...	186	13	2017-01-08 18:51:50.754405	video	2017-01-08T10:28:13+0000
3	30	2017-01-08T09:44:38+0000	บีบีซีไทย - BBC Thai	1526071940947174_1872055029682195	723	http://bbc.in/2j50LGO	หนังสือ “การต่อสู้ของข้าพเจ้า” (Mein Kampf) ขอ...	756	78	2017-01-08 18:51:50.754405	link	2017-01-08T11:39:12+0000
4	8	2017-01-08T09:24:53+0000	บีบีซีไทย - BBC Thai	1526071940947174_1872050469682651	434	https://www.facebook.com/BBCThai/videos/187205...	อนาคตพนักงานพิมพ์ดีดริบหรี่ ในยุคไอทีรุกเมียนม...	456	59	2017-01-08 18:51:50.754405	video	2017-01-08T10:25:27+0000