notebook.community

Edit and run



In [2]:

    
import pandas as pd
import numpy as np
import csv
%pylab inline
from matplotlib import pyplot









    



Populating the interactive namespace from numpy and matplotlib



In [3]:

    
df = pd.read_csv('~/Downloads/all-content.csv')



In [4]:

    
df.tail()









    Out[4]:






  
    
      
      Publisher
      Title
      Url
      Published
      Page Views
      Uniques
      Total Engaged Time
      Avg Engaged Time
      Social Actions
      Social Referrals
      ...
      Sharethrough Paid Desktop Referrals
      Amplify Paid Referrals
      Amplify Paid Mobile Referrals
      Amplify Paid Tablet Referrals
      Amplify Paid Desktop Referrals
      Gravity Paid Referrals
      Gravity Paid Mobile Referrals
      Gravity Paid Tablet Referrals
      Gravity Paid Desktop Referrals
      Nativo Paid Referrals
    
  
  
    
      4020
      Atlas Obscura
      Kunsthaus Graz
      http://www.atlasobscura.com/places/kunsthaus-g...
      2016-04-04T09:00:00
      1
      1.0
      55000
      55.000000
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4021
      Atlas Obscura
      Hans Christian Andersen Museum
      http://www.atlasobscura.com/places/hans-christ...
      2016-03-30T09:00:00
      2
      1.0
      165000
      82.500000
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4022
      Atlas Obscura
      Club 47
      http://www.atlasobscura.com/places/club-47
      2015-09-22T15:00:00
      438
      395.0
      26315000
      60.079909
      3
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4023
      Atlas Obscura
      The Tree Crosses of Rosma Forest
      http://www.atlasobscura.com/places/the-tree-cr...
      2016-04-08T11:00:00
      1
      1.0
      595000
      595.000000
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4024
      Atlas Obscura
      OBSCURA SOCIETY NY: THE ROBOTIC CHURCH Perform...
      http://www.atlasobscura.com/events/http-www-at...
      2015-09-21T16:00:00
      2
      2.0
      140000
      70.000000
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

5 rows × 115 columns



In [56]:

    
df2 = df[["Audience Targeting","Lifetime Post Total Reach","Lifetime Post organic reach", "Lifetime Engaged Users"]]



In [57]:

    
df2["Count"] = 1
df2.head()









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':






    Out[57]:






  
    
      
      Audience Targeting
      Lifetime Post Total Reach
      Lifetime Post organic reach
      Lifetime Engaged Users
      Count
    
    
      Posted
      
      
      
      
      
    
  
  
    
      2016-02-10 23:58:00
      Live Entertainment, Dinosaur, Tyrannosaurus, P...
      31035
      31035
      800
      1
    
    
      2016-02-10 22:27:00
      
      49780
      49780
      1735
      1
    
    
      2016-02-10 20:56:00
      Sailing, Scuba diving, Shipwreck, Ruins
      41273
      41273
      1860
      1
    
    
      2016-02-10 19:23:00
      Oregon, Ruins
      104310
      104310
      6296
      1
    
    
      2016-02-10 17:51:00
      Australia, Geology, Rock (geology), Travel, Ec...
      43133
      43133
      1380
      1

I need to resample the dataset, but to do that I think I need to set the Posted as the index?



In [39]:

    
df2_pivoted = pd.pivot_table(
    df2,
    ["Lifetime Post Total Reach","Lifetime Post organic reach","Lifetime Engaged Users","Count"],
    aggfunc = np.sum)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-39-e6b199e339bb> in <module>()
      2     df2,
      3     ["Lifetime Post Total Reach","Lifetime Post organic reach","Lifetime Engaged Users","Count"],
----> 4     aggfunc = np.sum)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/tools/pivot.pyc in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name)
    111             data = data[to_filter]
    112 
--> 113     grouped = data.groupby(keys)
    114     agged = grouped.agg(aggfunc)
    115 

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze)
   3434         axis = self._get_axis_number(axis)
   3435         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
-> 3436                        sort=sort, group_keys=group_keys, squeeze=squeeze)
   3437 
   3438     def asfreq(self, freq, method=None, how=None, normalize=False):

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in groupby(obj, by, **kwds)
   1309         raise TypeError('invalid type: %s' % type(obj))
   1310 
-> 1311     return klass(obj, by, **kwds)
   1312 
   1313 

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze)
    416         if grouper is None:
    417             grouper, exclusions, obj = _get_grouper(obj, keys, axis=axis,
--> 418                                                     level=level, sort=sort)
    419 
    420         self.obj = obj

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _get_grouper(obj, key, axis, level, sort)
   2277 
   2278     if len(groupings) == 0:
-> 2279         raise ValueError('No group keys passed!')
   2280 
   2281     # create the internals grouper

ValueError: No group keys passed!



In [71]:

    
df2_resampled = df2.resample('W,',how='sum')



In [72]:

    
df2_resampled.tail()









    Out[72]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post organic reach
      Lifetime Engaged Users
      Count
    
    
      Posted
      
      
      
      
    
  
  
    
      2016-01-17
      2105718
      2105718
      93521
      46
    
    
      2016-01-24
      4265324
      4265324
      199711
      125
    
    
      2016-01-31
      5842000
      5842000
      272560
      138
    
    
      2016-02-07
      7153317
      7142510
      265521
      127
    
    
      2016-02-14
      4251126
      4251126
      167284
      64



In [73]:

    
df2_resampled["Average Reach"] = df2_resampled["Lifetime Post Total Reach"]/df2_resampled["Count"]



In [74]:

    
df2_resampled.tail()









    Out[74]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post organic reach
      Lifetime Engaged Users
      Count
      Average Reach
    
    
      Posted
      
      
      
      
      
    
  
  
    
      2016-01-17
      2105718
      2105718
      93521
      46
      45776.478261
    
    
      2016-01-24
      4265324
      4265324
      199711
      125
      34122.592000
    
    
      2016-01-31
      5842000
      5842000
      272560
      138
      42333.333333
    
    
      2016-02-07
      7153317
      7142510
      265521
      127
      56325.330709
    
    
      2016-02-14
      4251126
      4251126
      167284
      64
      66423.843750



In [131]:

    
df2_resampled.plot(y="Average Reach",kind='line',title="Mean Post Reach")
df2_resampled.plot(y="Lifetime Post Total Reach",kind='line')
df2_resampled["Average Organic Reach"] = df2_resampled["Lifetime Post organic reach"] / df2_resampled["Count"]
df2_resampled.plot(y="Average Organic Reach")









    Out[131]:





<matplotlib.axes._subplots.AxesSubplot at 0x10b52ae50>



In [69]:

    
df2_resampled









    Out[69]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post organic reach
      Lifetime Engaged Users
      Count
      Average Reach
      Average Organic Reach
    
    
      Posted
      
      
      
      
      
      
    
  
  
    
      2015-11-01
      19000
      19000
      572
      1
      19000.000000
      19000.000000
    
    
      2015-11-08
      3020488
      3015474
      182866
      93
      32478.365591
      32424.451613
    
    
      2015-11-15
      2543807
      2543807
      104623
      100
      25438.070000
      25438.070000
    
    
      2015-11-22
      3368053
      3368053
      164476
      105
      32076.695238
      32076.695238
    
    
      2015-11-29
      3135328
      3135328
      148977
      97
      32322.969072
      32322.969072
    
    
      2015-12-06
      3437571
      3437571
      167019
      105
      32738.771429
      32738.771429
    
    
      2015-12-13
      2918521
      2918521
      147482
      86
      33936.290698
      33936.290698
    
    
      2015-12-20
      3478403
      3478403
      155559
      124
      28051.637097
      28051.637097
    
    
      2015-12-27
      3062764
      3062764
      137932
      87
      35204.183908
      35204.183908
    
    
      2016-01-03
      3455661
      3455661
      154793
      85
      40654.835294
      40654.835294
    
    
      2016-01-10
      7310201
      7310201
      432257
      117
      62480.350427
      62480.350427
    
    
      2016-01-17
      2105718
      2105718
      93521
      46
      45776.478261
      45776.478261
    
    
      2016-01-24
      4265324
      4265324
      199711
      125
      34122.592000
      34122.592000
    
    
      2016-01-31
      5842000
      5842000
      272560
      138
      42333.333333
      42333.333333
    
    
      2016-02-07
      7153317
      7142510
      265521
      127
      56325.330709
      56240.236220
    
    
      2016-02-14
      4251126
      4251126
      167284
      64
      66423.843750
      66423.843750



In [76]:

    
df3_resampled = df2.resample('W,',how='median')



In [130]:

    
df3_resampled.plot(y="Lifetime Post Total Reach", title = "Median Post Reach")









    Out[130]:





<matplotlib.axes._subplots.AxesSubplot at 0x10aa4b990>



In [111]:

    
df2["Targeted"] = pd.Series(df2["Audience Targeting"]) != " "









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [113]:

    
df2.Targeted = df2.Targeted.astype(int)









    



/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.py:2387: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value



In [115]:

    
df2["Targeted"].corr(df2["Lifetime Post Total Reach"])









    Out[115]:





0.10632153388811802



In [117]:

    
df2_pivoted = pd.pivot_table(df2,values = ["Lifetime Post Total Reach","Count"],index=["Targeted"],aggfunc=np.sum)



In [118]:

    
df2_pivoted









    Out[118]:






  
    
      
      Count
      Lifetime Post Total Reach
    
    
      Targeted
      
      
    
  
  
    
      0
      1431
      54117763
    
    
      1
      69
      5249519



In [119]:

    
df2_pivoted["Average Reach"] = df2_pivoted["Lifetime Post Total Reach"] / df2_pivoted["Count"]



In [120]:

    
df2_pivoted









    Out[120]:






  
    
      
      Count
      Lifetime Post Total Reach
      Average Reach
    
    
      Targeted
      
      
      
    
  
  
    
      0
      1431
      54117763
      37818.143256
    
    
      1
      69
      5249519
      76079.985507



In [126]:

    
df2_recent = df2[:230]



In [127]:

    
df2_recent["Targeted"] = pd.Series(df2_recent["Audience Targeting"]) != " "
df2_recent.Targeted = df2_recent.Targeted.astype(int)
df2_pivoted = pd.pivot_table(df2_recent,values = ["Lifetime Post Total Reach","Count"],index=["Targeted"],aggfunc=np.sum)
df2_pivoted["Average Reach"] = df2_pivoted["Lifetime Post Total Reach"] / df2_pivoted["Count"]









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [128]:

    
df2_pivoted









    Out[128]:






  
    
      
      Count
      Lifetime Post Total Reach
      Average Reach
    
    
      Targeted
      
      
      
    
  
  
    
      0
      161
      8641833
      53675.981366
    
    
      1
      69
      5249519
      76079.985507



In [129]:

    
df2_pivoted.plot(y="Average Reach",kind='bar')









    Out[129]:





<matplotlib.axes._subplots.AxesSubplot at 0x10aa06ed0>



In [ ]:

	Publisher	Title	Url	Published	Page Views	Uniques	Total Engaged Time	Avg Engaged Time	Social Actions	...
4020	Atlas Obscura	Kunsthaus Graz	http://www.atlasobscura.com/places/kunsthaus-g...	2016-04-04T09:00:00	1	1.0	55000	55.000000	0	...
4021	Atlas Obscura	Hans Christian Andersen Museum	http://www.atlasobscura.com/places/hans-christ...	2016-03-30T09:00:00	2	1.0	165000	82.500000	0	...
4022	Atlas Obscura	Club 47	http://www.atlasobscura.com/places/club-47	2015-09-22T15:00:00	438	395.0	26315000	60.079909	3	...
4023	Atlas Obscura	The Tree Crosses of Rosma Forest	http://www.atlasobscura.com/places/the-tree-cr...	2016-04-08T11:00:00	1	1.0	595000	595.000000	0	...
4024	Atlas Obscura	OBSCURA SOCIETY NY: THE ROBOTIC CHURCH Perform...	http://www.atlasobscura.com/events/http-www-at...	2015-09-21T16:00:00	2	2.0	140000	70.000000	0	...

	Audience Targeting	Lifetime Post Total Reach	Lifetime Post organic reach	Lifetime Engaged Users	Count
Posted
2016-02-10 23:58:00	Live Entertainment, Dinosaur, Tyrannosaurus, P...	31035	31035	800	1
2016-02-10 22:27:00		49780	49780	1735	1
2016-02-10 20:56:00	Sailing, Scuba diving, Shipwreck, Ruins	41273	41273	1860	1
2016-02-10 19:23:00	Oregon, Ruins	104310	104310	6296	1
2016-02-10 17:51:00	Australia, Geology, Rock (geology), Travel, Ec...	43133	43133	1380	1

	Lifetime Post Total Reach	Lifetime Post organic reach	Lifetime Engaged Users	Count
Posted
2016-01-17	2105718	2105718	93521	46
2016-01-24	4265324	4265324	199711	125
2016-01-31	5842000	5842000	272560	138
2016-02-07	7153317	7142510	265521	127
2016-02-14	4251126	4251126	167284	64

	Lifetime Post Total Reach	Lifetime Post organic reach	Lifetime Engaged Users	Count	Average Reach	Average Organic Reach
Posted
2015-11-01	19000	19000	572	1	19000.000000	19000.000000
2015-11-08	3020488	3015474	182866	93	32478.365591	32424.451613
2015-11-15	2543807	2543807	104623	100	25438.070000	25438.070000
2015-11-22	3368053	3368053	164476	105	32076.695238	32076.695238
2015-11-29	3135328	3135328	148977	97	32322.969072	32322.969072
2015-12-06	3437571	3437571	167019	105	32738.771429	32738.771429
2015-12-13	2918521	2918521	147482	86	33936.290698	33936.290698
2015-12-20	3478403	3478403	155559	124	28051.637097	28051.637097
2015-12-27	3062764	3062764	137932	87	35204.183908	35204.183908
2016-01-03	3455661	3455661	154793	85	40654.835294	40654.835294
2016-01-10	7310201	7310201	432257	117	62480.350427	62480.350427
2016-01-17	2105718	2105718	93521	46	45776.478261	45776.478261
2016-01-24	4265324	4265324	199711	125	34122.592000	34122.592000
2016-01-31	5842000	5842000	272560	138	42333.333333	42333.333333
2016-02-07	7153317	7142510	265521	127	56325.330709	56240.236220
2016-02-14	4251126	4251126	167284	64	66423.843750	66423.843750