notebook.community

Edit and run



In [106]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline



In [60]:

    
df = pd.read_csv('dietbet.csv')
df.dtypes
def fix_dates(x):
    return x.split(' ')[0]
df['Posted'] = df.Posted.apply(fix_dates)



In [61]:

    
df.drop(['Post ID','Permalink'],inplace=True,axis=1)



In [62]:

    
df.drop(['Countries','Languages'],inplace=True,axis=1)



In [63]:

    
engagement = pd.Series(df['Lifetime Engaged Users']/df['Lifetime Post Total Impressions'])
df.insert(5,'Engagement Rate',engagement)



In [64]:

    
def multiply_ten(x):
    return x*100
df['Engagement Rate'] = df['Engagement Rate'].apply(multiply_ten)



In [65]:

    
df.drop(['Lifetime Average time video viewed':],axis=1,inplace=True)









    



  File "<ipython-input-65-a08d4e9d7b63>", line 1
    df.drop(['Lifetime Average time video viewed':],axis=1,inplace=True)
                                                 ^
SyntaxError: invalid syntax



In [66]:

    
df.drop(df.columns[21:],inplace=True,axis=1)



In [68]:

    
length_msg = pd.Series([len(x) for x in df['Post Message']])
df.insert(0,'Message Length',length_msg)



In [70]:

    
df.to_csv('dietbet2.csv')
df2 = pd.read_csv('dietbet2.csv',parse_dates=True)



In [ ]:

    
df3 = pd.read_csv('dietbet2.csv',parse_dates=True)



In [113]:

    
ax = df3.boxplot(column='Lifetime Post Total Reach',by='Type')



In [74]:

    
df2['Posted'] = pd.to_datetime(df['Posted'])



In [76]:

    
df.dtypes









    Out[76]:





Unnamed: 0                                                                      int64
Message Length                                                                  int64
Post Message                                                                   object
Type                                                                           object
Posted                                                                 datetime64[ns]
Audience Targeting                                                             object
Lifetime Post Total Reach                                                       int64
Engagement Rate                                                               float64
Lifetime Post organic reach                                                     int64
Lifetime Post Paid Reach                                                        int64
Lifetime Post Total Impressions                                                 int64
Lifetime Post Organic Impressions                                               int64
Lifetime Post Paid Impressions                                                  int64
Lifetime Engaged Users                                                          int64
Lifetime Post Consumers                                                         int64
Lifetime Post Consumptions                                                      int64
Lifetime Negative feedback                                                      int64
Lifetime Negative Feedback from Users                                           int64
Lifetime Post Impressions by people who have liked your Page                    int64
Lifetime Post reach by people who like your Page                                int64
Lifetime Post Paid Impressions by people who have liked your Page               int64
Lifetime Paid reach of a post by people who like your Page                      int64
Lifetime People who have liked your Page and engaged with your post             int64
dtype: object



In [82]:

    
df_freq = df2[['Posted','Lifetime Post Total Reach','Engagement Rate','Lifetime Post Consumptions']]



In [83]:

    
df_freq['count'] = 1









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [84]:

    
df_freq.drop('Engagement Rate',inplace=True,axis=1)









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [86]:

    
df_freq.set_index('Posted',inplace=True)
df_resample = df_freq.resample('D',how='sum')









    



/Users/Mike/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...).sum()
  from ipykernel import kernelapp as app



In [88]:

    
df_resample.dropna(inplace=True)



In [90]:

    
df_resample['Mean Reach'] = df_resample['Lifetime Post Total Reach'] / df_resample['count']
df_resample['Mean Consumption'] = df_resample['Lifetime Post Consumptions'] / df_resample['count']



In [94]:

    
df_resample
df_resample['Consumption Rate'] = df_resample['Mean Consumption'] / df_resample['Mean Reach']



In [97]:

    
df_resample.corr()









    Out[97]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post Consumptions
      count
      Mean Reach
      Mean Consumption
      Consumption Rate
    
  
  
    
      Lifetime Post Total Reach
      1.000000
      0.822745
      0.242287
      0.863281
      0.753697
      0.333010
    
    
      Lifetime Post Consumptions
      0.822745
      1.000000
      0.010129
      0.851056
      0.975008
      0.663392
    
    
      count
      0.242287
      0.010129
      1.000000
      -0.211647
      -0.161383
      -0.017229
    
    
      Mean Reach
      0.863281
      0.851056
      -0.211647
      1.000000
      0.884081
      0.387112
    
    
      Mean Consumption
      0.753697
      0.975008
      -0.161383
      0.884081
      1.000000
      0.618451
    
    
      Consumption Rate
      0.333010
      0.663392
      -0.017229
      0.387112
      0.618451
      1.000000



In [98]:

    
df_resample.describe()









    Out[98]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post Consumptions
      count
      Mean Reach
      Mean Consumption
      Consumption Rate
    
  
  
    
      count
      75.000000
      75.000000
      75.000000
      75.000000
      75.000000
      75.000000
    
    
      mean
      3343.013333
      117.706667
      1.426667
      2611.907556
      97.564667
      0.029437
    
    
      std
      2632.497712
      180.568230
      0.756474
      2426.448961
      178.371113
      0.022312
    
    
      min
      86.000000
      0.000000
      1.000000
      86.000000
      0.000000
      0.000000
    
    
      25%
      1646.500000
      30.000000
      1.000000
      1317.500000
      23.500000
      0.014901
    
    
      50%
      2593.000000
      59.000000
      1.000000
      1801.000000
      39.000000
      0.024898
    
    
      75%
      4431.000000
      129.500000
      2.000000
      2651.000000
      95.250000
      0.034212
    
    
      max
      14526.000000
      1049.000000
      5.000000
      14526.000000
      1049.000000
      0.135101



In [100]:

    
df_resample[(df_resample['count'] > 4)]









    Out[100]:






  
    
      
      Lifetime Post Total Reach
      Lifetime Post Consumptions
      count
      Mean Reach
      Mean Consumption
      Consumption Rate
    
    
      Posted
      
      
      
      
      
      
    
  
  
    
      2016-01-06
      3812.0
      123.0
      5.0
      762.4
      24.6
      0.032267



In [107]:

    
_ = df_resample.plot(kind='scatter',x='count',y='Lifetime Post Consumptions')



In [ ]:

	Lifetime Post Total Reach	Lifetime Post Consumptions	count	Mean Reach	Mean Consumption	Consumption Rate
Lifetime Post Total Reach	1.000000	0.822745	0.242287	0.863281	0.753697	0.333010
Lifetime Post Consumptions	0.822745	1.000000	0.010129	0.851056	0.975008	0.663392
count	0.242287	0.010129	1.000000	-0.211647	-0.161383	-0.017229
Mean Reach	0.863281	0.851056	-0.211647	1.000000	0.884081	0.387112
Mean Consumption	0.753697	0.975008	-0.161383	0.884081	1.000000	0.618451
Consumption Rate	0.333010	0.663392	-0.017229	0.387112	0.618451	1.000000

	Lifetime Post Total Reach	Lifetime Post Consumptions	count	Mean Reach	Mean Consumption	Consumption Rate
count	75.000000	75.000000	75.000000	75.000000	75.000000	75.000000
mean	3343.013333	117.706667	1.426667	2611.907556	97.564667	0.029437
std	2632.497712	180.568230	0.756474	2426.448961	178.371113	0.022312
min	86.000000	0.000000	1.000000	86.000000	0.000000	0.000000
25%	1646.500000	30.000000	1.000000	1317.500000	23.500000	0.014901
50%	2593.000000	59.000000	1.000000	1801.000000	39.000000	0.024898
75%	4431.000000	129.500000	2.000000	2651.000000	95.250000	0.034212
max	14526.000000	1049.000000	5.000000	14526.000000	1049.000000	0.135101