In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
sns.set_style("darkgrid")
import sklearn.linear_model
import datetime

%matplotlib inline

In [2]:
df = pd.read_csv('~/Downloads/all-content.csv')

In [3]:
df.head()


Out[3]:
Publisher Title Url Published Page Views Uniques Total Engaged Time Avg Engaged Time Social Actions Social Referrals ... Sharethrough Paid Desktop Referrals Amplify Paid Referrals Amplify Paid Mobile Referrals Amplify Paid Tablet Referrals Amplify Paid Desktop Referrals Gravity Paid Referrals Gravity Paid Mobile Referrals Gravity Paid Tablet Referrals Gravity Paid Desktop Referrals Nativo Paid Referrals
0 Atlas Obscura The Famous Photo of Chernobyl's Most Dange... http://www.atlasobscura.com/articles/the-famou... 2016-01-25T00:30:00 678116 593158.0 155172300000 228.828549 26930 476363 ... 0 0 0 0 0 0 0 0 0 0
1 Atlas Obscura How Capicola Became Gabagool: The Italian New ... http://www.atlasobscura.com/articles/how-capic... 2015-11-05T16:40:00 667305 545155.0 190769270000 285.880175 201909 443247 ... 0 0 0 0 0 0 0 0 0 0
2 Atlas Obscura Fascinating Photos from the Secret Trash Colle... http://www.atlasobscura.com/articles/fascinati... 2016-03-17T15:25:00 380902 349447.0 50527410000 132.651994 59595 287620 ... 0 0 0 0 0 0 0 0 0 0
3 Atlas Obscura C.S. Lewis Greatest Fiction: Convincing Ameri... http://www.atlasobscura.com/articles/cs-lewis-... 2015-12-03T12:47:00 344742 314081.0 61664380000 178.871098 97963 261005 ... 0 0 0 0 0 0 0 0 0 0
4 Atlas Obscura How Miracle Max Nearly Shut Down 'The Prin... http://www.atlasobscura.com/articles/how-mirac... 2015-09-25T15:00:00 344233 314316.0 51442570000 149.441134 14467 204406 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 115 columns


In [4]:
places = df[df['Url'].str.contains('/places/')==True]

In [10]:
places_traffic = places[['Title','Published','Page Views','FaceBook Referrals','Pinterest Referrals','Search Referrals']]

In [11]:
places_traffic.head()


Out[11]:
Title Published Page Views FaceBook Referrals Pinterest Referrals Search Referrals
5 Kentucky Bend 2015-09-16T15:00:00 257638 201893 9 550
17 Mystery Soda Machine 2015-12-23T15:00:00 133317 92055 21 8414
45 Long Lines Building, 33 Thomas Street 2015-09-01T15:00:00 56221 39888 5 734
66 Inaccessible Island 2016-04-13T09:00:00 42090 33775 1 38
136 Fuggerei 2015-08-20T15:00:00 32408 15815 12 1991

In [14]:
articles = df[df['Url'].str.contains('/articles/')==True]

In [15]:
articles_traffic = articles[['Title','Published','Page Views','FaceBook Referrals','Pinterest Referrals','Search Referrals']]

In [17]:
articles_traffic = articles_traffic.set_index('Published')
places_traffic = places_traffic.set_index('Published')

In [28]:
datetime.date.today()


Out[28]:
datetime.date(2016, 5, 25)

In [29]:
sum(places['FaceBook Referrals'])/len(places.Title)


Out[29]:
883

In [34]:
places_traffic = places_traffic[places_traffic['Search Referrals']>10]
places_traffic.describe()


Out[34]:
Page Views FaceBook Referrals Pinterest Referrals Search Referrals
count 1041.000000 1041.000000 1041.000000 1041.000000
mean 4007.038425 1226.681076 1.512968 100.198847
std 11344.133195 7182.029749 4.161416 381.319089
min 209.000000 0.000000 0.000000 11.000000
25% 1289.000000 60.000000 0.000000 24.000000
50% 2118.000000 359.000000 0.000000 41.000000
75% 3954.000000 1003.000000 1.000000 73.000000
max 257638.000000 201893.000000 60.000000 8414.000000

In [35]:
articles_traffic = articles_traffic[articles_traffic['Search Referrals']>10]
articles_traffic.describe()


Out[35]:
Page Views FaceBook Referrals Pinterest Referrals Search Referrals
count 1885.000000 1885.000000 1885.000000 1885.000000
mean 13109.864721 5163.321485 3.811671 301.507692
std 32724.973584 20117.220306 22.895221 1237.795723
min 265.000000 0.000000 0.000000 11.000000
25% 2797.000000 467.000000 0.000000 46.000000
50% 5743.000000 1264.000000 0.000000 101.000000
75% 11851.000000 3697.000000 2.000000 233.000000
max 678116.000000 454110.000000 732.000000 27668.000000

In [62]:
fig, axes = plt.subplots(nrows=2,ncols=2,sharey='row',figsize=(10,10))
axes[0,0].set_title('Places')
axes[0,1].set_title('Articles')
places_traffic.boxplot('FaceBook Referrals',showfliers=False,ax=axes[0,0])
places_traffic.boxplot('Search Referrals',showfliers=False,ax=axes[1,0])
articles_traffic.boxplot('FaceBook Referrals',showfliers=False,ax=axes[0,1])
articles_traffic.boxplot('Search Referrals',showfliers=False,ax=axes[1,1])


/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:6: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:7: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
Out[62]:
{'boxes': [<matplotlib.lines.Line2D at 0x129f0a590>],
 'caps': [<matplotlib.lines.Line2D at 0x1285a1710>,
  <matplotlib.lines.Line2D at 0x1285a1d50>],
 'fliers': [],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x129e493d0>],
 'whiskers': [<matplotlib.lines.Line2D at 0x127f20590>,
  <matplotlib.lines.Line2D at 0x127f34990>]}

In [ ]: