In [2]:
import pandas as pd
import numpy as np
import csv
%pylab inline
from matplotlib import pyplot


Populating the interactive namespace from numpy and matplotlib

In [3]:
df = pd.read_csv('~/Downloads/all-content.csv')

In [4]:
df.tail()


Out[4]:
Publisher Title Url Published Page Views Uniques Total Engaged Time Avg Engaged Time Social Actions Social Referrals ... Sharethrough Paid Desktop Referrals Amplify Paid Referrals Amplify Paid Mobile Referrals Amplify Paid Tablet Referrals Amplify Paid Desktop Referrals Gravity Paid Referrals Gravity Paid Mobile Referrals Gravity Paid Tablet Referrals Gravity Paid Desktop Referrals Nativo Paid Referrals
4020 Atlas Obscura Kunsthaus Graz http://www.atlasobscura.com/places/kunsthaus-g... 2016-04-04T09:00:00 1 1.0 55000 55.000000 0 0 ... 0 0 0 0 0 0 0 0 0 0
4021 Atlas Obscura Hans Christian Andersen Museum http://www.atlasobscura.com/places/hans-christ... 2016-03-30T09:00:00 2 1.0 165000 82.500000 0 0 ... 0 0 0 0 0 0 0 0 0 0
4022 Atlas Obscura Club 47 http://www.atlasobscura.com/places/club-47 2015-09-22T15:00:00 438 395.0 26315000 60.079909 3 0 ... 0 0 0 0 0 0 0 0 0 0
4023 Atlas Obscura The Tree Crosses of Rosma Forest http://www.atlasobscura.com/places/the-tree-cr... 2016-04-08T11:00:00 1 1.0 595000 595.000000 0 0 ... 0 0 0 0 0 0 0 0 0 0
4024 Atlas Obscura OBSCURA SOCIETY NY: THE ROBOTIC CHURCH Perform... http://www.atlasobscura.com/events/http-www-at... 2015-09-21T16:00:00 2 2.0 140000 70.000000 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 115 columns


In [56]:
df2 = df[["Audience Targeting","Lifetime Post Total Reach","Lifetime Post organic reach", "Lifetime Engaged Users"]]

In [57]:
df2["Count"] = 1
df2.head()


/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
Out[57]:
Audience Targeting Lifetime Post Total Reach Lifetime Post organic reach Lifetime Engaged Users Count
Posted
2016-02-10 23:58:00 Live Entertainment, Dinosaur, Tyrannosaurus, P... 31035 31035 800 1
2016-02-10 22:27:00 49780 49780 1735 1
2016-02-10 20:56:00 Sailing, Scuba diving, Shipwreck, Ruins 41273 41273 1860 1
2016-02-10 19:23:00 Oregon, Ruins 104310 104310 6296 1
2016-02-10 17:51:00 Australia, Geology, Rock (geology), Travel, Ec... 43133 43133 1380 1

I need to resample the dataset, but to do that I think I need to set the Posted as the index?


In [39]:
df2_pivoted = pd.pivot_table(
    df2,
    ["Lifetime Post Total Reach","Lifetime Post organic reach","Lifetime Engaged Users","Count"],
    aggfunc = np.sum)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-39-e6b199e339bb> in <module>()
      2     df2,
      3     ["Lifetime Post Total Reach","Lifetime Post organic reach","Lifetime Engaged Users","Count"],
----> 4     aggfunc = np.sum)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/tools/pivot.pyc in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name)
    111             data = data[to_filter]
    112 
--> 113     grouped = data.groupby(keys)
    114     agged = grouped.agg(aggfunc)
    115 

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze)
   3434         axis = self._get_axis_number(axis)
   3435         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
-> 3436                        sort=sort, group_keys=group_keys, squeeze=squeeze)
   3437 
   3438     def asfreq(self, freq, method=None, how=None, normalize=False):

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in groupby(obj, by, **kwds)
   1309         raise TypeError('invalid type: %s' % type(obj))
   1310 
-> 1311     return klass(obj, by, **kwds)
   1312 
   1313 

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze)
    416         if grouper is None:
    417             grouper, exclusions, obj = _get_grouper(obj, keys, axis=axis,
--> 418                                                     level=level, sort=sort)
    419 
    420         self.obj = obj

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _get_grouper(obj, key, axis, level, sort)
   2277 
   2278     if len(groupings) == 0:
-> 2279         raise ValueError('No group keys passed!')
   2280 
   2281     # create the internals grouper

ValueError: No group keys passed!

In [71]:
df2_resampled = df2.resample('W,',how='sum')

In [72]:
df2_resampled.tail()


Out[72]:
Lifetime Post Total Reach Lifetime Post organic reach Lifetime Engaged Users Count
Posted
2016-01-17 2105718 2105718 93521 46
2016-01-24 4265324 4265324 199711 125
2016-01-31 5842000 5842000 272560 138
2016-02-07 7153317 7142510 265521 127
2016-02-14 4251126 4251126 167284 64

In [73]:
df2_resampled["Average Reach"] = df2_resampled["Lifetime Post Total Reach"]/df2_resampled["Count"]

In [74]:
df2_resampled.tail()


Out[74]:
Lifetime Post Total Reach Lifetime Post organic reach Lifetime Engaged Users Count Average Reach
Posted
2016-01-17 2105718 2105718 93521 46 45776.478261
2016-01-24 4265324 4265324 199711 125 34122.592000
2016-01-31 5842000 5842000 272560 138 42333.333333
2016-02-07 7153317 7142510 265521 127 56325.330709
2016-02-14 4251126 4251126 167284 64 66423.843750

In [131]:
df2_resampled.plot(y="Average Reach",kind='line',title="Mean Post Reach")
df2_resampled.plot(y="Lifetime Post Total Reach",kind='line')
df2_resampled["Average Organic Reach"] = df2_resampled["Lifetime Post organic reach"] / df2_resampled["Count"]
df2_resampled.plot(y="Average Organic Reach")


Out[131]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b52ae50>

In [69]:
df2_resampled


Out[69]:
Lifetime Post Total Reach Lifetime Post organic reach Lifetime Engaged Users Count Average Reach Average Organic Reach
Posted
2015-11-01 19000 19000 572 1 19000.000000 19000.000000
2015-11-08 3020488 3015474 182866 93 32478.365591 32424.451613
2015-11-15 2543807 2543807 104623 100 25438.070000 25438.070000
2015-11-22 3368053 3368053 164476 105 32076.695238 32076.695238
2015-11-29 3135328 3135328 148977 97 32322.969072 32322.969072
2015-12-06 3437571 3437571 167019 105 32738.771429 32738.771429
2015-12-13 2918521 2918521 147482 86 33936.290698 33936.290698
2015-12-20 3478403 3478403 155559 124 28051.637097 28051.637097
2015-12-27 3062764 3062764 137932 87 35204.183908 35204.183908
2016-01-03 3455661 3455661 154793 85 40654.835294 40654.835294
2016-01-10 7310201 7310201 432257 117 62480.350427 62480.350427
2016-01-17 2105718 2105718 93521 46 45776.478261 45776.478261
2016-01-24 4265324 4265324 199711 125 34122.592000 34122.592000
2016-01-31 5842000 5842000 272560 138 42333.333333 42333.333333
2016-02-07 7153317 7142510 265521 127 56325.330709 56240.236220
2016-02-14 4251126 4251126 167284 64 66423.843750 66423.843750

In [76]:
df3_resampled = df2.resample('W,',how='median')

In [130]:
df3_resampled.plot(y="Lifetime Post Total Reach", title = "Median Post Reach")


Out[130]:
<matplotlib.axes._subplots.AxesSubplot at 0x10aa4b990>

In [111]:
df2["Targeted"] = pd.Series(df2["Audience Targeting"]) != " "


/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [113]:
df2.Targeted = df2.Targeted.astype(int)


/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.py:2387: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value

In [115]:
df2["Targeted"].corr(df2["Lifetime Post Total Reach"])


Out[115]:
0.10632153388811802

In [117]:
df2_pivoted = pd.pivot_table(df2,values = ["Lifetime Post Total Reach","Count"],index=["Targeted"],aggfunc=np.sum)

In [118]:
df2_pivoted


Out[118]:
Count Lifetime Post Total Reach
Targeted
0 1431 54117763
1 69 5249519

In [119]:
df2_pivoted["Average Reach"] = df2_pivoted["Lifetime Post Total Reach"] / df2_pivoted["Count"]

In [120]:
df2_pivoted


Out[120]:
Count Lifetime Post Total Reach Average Reach
Targeted
0 1431 54117763 37818.143256
1 69 5249519 76079.985507

In [126]:
df2_recent = df2[:230]

In [127]:
df2_recent["Targeted"] = pd.Series(df2_recent["Audience Targeting"]) != " "
df2_recent.Targeted = df2_recent.Targeted.astype(int)
df2_pivoted = pd.pivot_table(df2_recent,values = ["Lifetime Post Total Reach","Count"],index=["Targeted"],aggfunc=np.sum)
df2_pivoted["Average Reach"] = df2_pivoted["Lifetime Post Total Reach"] / df2_pivoted["Count"]


/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [128]:
df2_pivoted


Out[128]:
Count Lifetime Post Total Reach Average Reach
Targeted
0 161 8641833 53675.981366
1 69 5249519 76079.985507

In [129]:
df2_pivoted.plot(y="Average Reach",kind='bar')


Out[129]:
<matplotlib.axes._subplots.AxesSubplot at 0x10aa06ed0>

In [ ]: