cd
cd tutorials/paa_2017_social_media
will change to the paa_2017_social_media folder within the tutorials directoryjupyter notebook
conda create -n py27 python=2.7
in the terminal to create a Python2.7 environment named py27source activate py27
to activate the py27 environment conda install notebook ipykernel matplotlib
ipython kernel install --user
pip install -r requirements.txt
python setup.py install
import
is a Python modulefrom collections import OrderedDict
import pandas as pd
imports the pandas module but assigns this module the name "pd" in the namespacepandas.read_csv()
to read a .csv file can now type pd.read_csv()
import
to import custom code/functions from a Python .py file in the current directoryfrom utils import *
In [1]:
# uncomment the line below to view the functions in utils.py
#% cat utils.py
In [2]:
import os
import re
import sys
import csv
import json
import glob
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from collections import OrderedDict
from pysocialwatcher import watcherAPI
from utils import *
% matplotlib inline
os.getcwd()
getwd()
in R
In [ ]:
os.getcwd()
Click "Extend Access Token" button (at the bottom) (the button may not show up right away, just refresh the page)
Finally, you want to get your Ads Manager Account ID.
watcherAPI()
is a class within the pySocialWathcer module that makes requests with the Facebook Marketing APIwatcherAPI()
has multiple attributes including credentialsload_credentials_file
is a function within the watcher class that allows users to upload a .csv of Facebook Marketing API credentialscheck_tokens_account_valid
is a function that sends a generic example request to the Facebook Marketing API to assess the validity of the credentials loaded from the load_credentials_file
function
In [3]:
watcher = watcherAPI()
watcher.load_credentials_file("../credentials/facebook_credentials.csv")
watcher.check_tokens_account_valid()
print_geo_locations_given_query_and_location_type
In [ ]:
watcherAPI.print_geo_locations_given_query_and_location_type("new", ["city"])
In [4]:
watcher.print_bad_joke()
In [ ]:
watcher.print_behaviors_list()
read_table
function
In [5]:
US_states = pd.read_table("../data/US_states.csv", sep = ",", header=0)
US_states.head()
Out[5]:
In [6]:
US_states.iloc[12]
Out[6]:
run_data_collection
functionrun_data_collection
takes a .json file as an input and creates an API requestrun_data_collection
is a pandas dataframe and .csv filesrun_data_collection
function
In [7]:
illinois_dic={"name": "IL_example", "geo_locations": [{"name": "regions",
"values": [{"key":str(US_states["FB_key"][12])}],
"location_types": ["home"]}],
"genders": [0,1,2],
"ages_ranges": [{"min":13, "max":65}]}
illinois_dic_ordered = OrderedDict(sorted(illinois_dic.items(),key=lambda t: len(t[0])))
illinois_dic_json=json.dumps(illinois_dic_ordered, indent = 4)
print illinois_dic_json
file_name = "IL_example.json"
with open('../data/%s' % file_name, 'w') as outfile:
outfile.write(json.dumps(illinois_dic_ordered, indent = 4))
make a request to the Marketing API with the example dictionary created above
In [ ]:
watcher.run_data_collection("../data/%s" % file_name)
6029662272682 | This Life Event segment contains consumers who are likely to be new movers in the last 6 months.| New mover
In [ ]:
illinois_dic_new_movers={"name": "IL_example", "geo_locations": [{"name": "regions",
"values": [{"key":str(US_states["FB_key"][12])}],
"location_types": ["home"]}],
"genders": [0,1,2],
"ages_ranges": [{"min":13, "max":65}],
"behavior":[{"or": [6029662272682],"name": "new_mover"}]}
illinois_dic_new_movers_ordered = OrderedDict(sorted(illinois_dic_new_movers.items(),key=lambda t: len(t[0])))
illinois_dic_new_movers_json=json.dumps(illinois_dic_new_movers_ordered, indent = 4)
print illinois_dic_new_movers_json
file_name = "IL_example_new_movers.json"
with open('../data/%s' % file_name, 'w') as outfile:
outfile.write(json.dumps(illinois_dic_new_movers_ordered, indent = 4))
make a request to the Marketing API with the new movers dictionary created above
In [ ]:
watcher.run_data_collection("../data/%s" % file_name)
In [8]:
state_subset = US_states.loc[US_states['Abbreviation'].isin(["CA","NY", "TX", "IL"])]
# reindex state_subset
state_subset.index = [0,1,2,3]
state_subset
Out[8]:
In [ ]:
for state in range(0, len(state_subset["State Name"])):
if not os.path.exists('../data/new_movers_by_state/%s' % state_subset["State Name"][state]):
os.makedirs('../data/new_movers_by_state/%s' % state_subset["State Name"][state])
In [ ]:
UN_age_min = [13,15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
UN_age_max = [65,19, 24, 29, 34, 39, 44, 49, 54, 59, 65]
UN_age_table = {'age_min': UN_age_min, 'age_max': UN_age_max}
UN_age_table_df = pd.DataFrame(data=UN_age_table, index=None)
UN_age_table_df = UN_age_table_df[["age_min", "age_max"]]
UN_age_table_df.to_csv("../data/UN_age_table.csv", index=None)
In [ ]:
ctr = 0
for state in range(0, len(state_subset["State Name"])):
state_dic={"name": str(state_subset["State Name"][state])+"_new_movers",
"geo_locations": [{"name": "regions", "values": [{"key":str(US_states["FB_key"][state])}],
"location_types": ["home"]}],
"genders": [0,1,2],
"ages_ranges": [{"min":13, "max":65}],
"behavior":[{"or": [6029662272682],"name": "new_movers"}]}
state_dic["geo_locations"][0]["values"][0]['key'] = str(state_subset["FB_key"][state])
state_dic_ordered = OrderedDict(sorted(state_dic.items(),key=lambda t: len(t[0])))
state_dic_json=json.dumps(state_dic_ordered, indent = 4)
gender_dict = {'0':'female_male_total_pop'}
file_name = str(state_subset["State Name"][state])+"_new_movers"+".json"
state_folder = state_subset["State Name"][state]
if not os.path.exists('../data/new_movers_by_state/%s/api_requests_json' % state_folder):
os.makedirs('../data/new_movers_by_state/%s/api_requests_json' % state_folder)
if not os.path.exists('../data/new_movers_by_state/%s/api_requests_csv' % state_folder):
os.makedirs('../data/new_movers_by_state/%s/api_requests_csv' % state_folder)
with open('../data/new_movers_by_state/%s/api_requests_json/%s' % (state_folder,file_name), 'w') as outfile:
outfile.write(json.dumps(state_dic_ordered, indent = 4))
state_api_request = watcher.run_data_collection('../data/new_movers_by_state/%s/api_requests_json/%s' % (state_folder,file_name))
state_api_request
state_api_request.insert(0, "state",state_subset["State Name"][state])
csv_filename = file_name.split(".")[0]+".csv"
state_api_request.to_csv("../data/new_movers_by_state/%s/api_requests_csv/%s" % (state_folder,csv_filename),
index=False)
ctr = ctr +1
total =len(state_subset["State Name"])
print("file %.f of %.f " % (ctr,total))
print(file_name)
In [ ]:
%rm collect_finished_* dataframe_collecting_* dataframe_skeleton_*
use merge_subdirectories
function from utils to merge all folders directory of new movers by state into one .csv file
In [ ]:
merge_subdirectories('../data/new_movers_by_state/',
'raw_new_movers_data.csv')
read in merged .csv file of new movers by state
In [9]:
raw_new_movers_data = pd.read_csv("../data/raw_new_movers_data.csv")
raw_new_movers_data.head()
Out[9]:
In [10]:
print("the data has %s rows and %s columns" % (raw_new_movers_data.shape[0], raw_new_movers_data.shape[1]))
print(raw_new_movers_data.dtypes)
use process_facebook_data
function from utils to munge dataframe of merged movers
In [11]:
merged_new_movers_data = process_facebook_data(raw_new_movers_data)
merged_new_movers_data.head()
Out[11]:
group_by
function to group data by a column
In [12]:
merged_new_movers_data_genders = merged_new_movers_data.groupby("genders")
merged_new_movers_data_genders["audience"].sum()
Out[12]:
In [13]:
new_movers_total_pop = merged_new_movers_data.loc[merged_new_movers_data["genders"]=="total population"][["state", "audience"]]
new_movers_total_pop
Out[13]:
In [14]:
new_movers_audience = new_movers_total_pop["audience"]
new_movers_audience
Out[14]:
In [15]:
labels = [i for i in state_subset["Abbreviation"]]
labels
Out[15]:
In [16]:
new_movers_audience.mean()
Out[16]:
In [17]:
plt.subplots(1, figsize=(12, 8))
plt.subplots_adjust(bottom = 0.1)
plt.scatter(np.arange(len(new_movers_audience)), new_movers_audience)
for label, x, y in zip(labels,np.arange(len(new_movers_audience)), new_movers_audience):
plt.annotate(
label,
xy=(x, y), xytext=(-20, 20),
textcoords='offset points', ha='right', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
plt.xticks([])
plt.axhline(new_movers_audience.mean(), linestyle='--', color='red', alpha=0.3)
plt.title("Total Population of New Movers by State")
plt.xlabel("State")
plt.ylabel("Total New Movers")
plt.show()
In [18]:
import matplotlib
print("System and module version information: \n")
print('Python version:', sys.version_info)
print('numpy version:', np.__version__)
print('pandas version:', pd.__version__)
print('matplotlib version:', matplotlib.__version__)