In [106]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [60]:
df = pd.read_csv('dietbet.csv')
df.dtypes
def fix_dates(x):
return x.split(' ')[0]
df['Posted'] = df.Posted.apply(fix_dates)
In [61]:
df.drop(['Post ID','Permalink'],inplace=True,axis=1)
In [62]:
df.drop(['Countries','Languages'],inplace=True,axis=1)
In [63]:
engagement = pd.Series(df['Lifetime Engaged Users']/df['Lifetime Post Total Impressions'])
df.insert(5,'Engagement Rate',engagement)
In [64]:
def multiply_ten(x):
return x*100
df['Engagement Rate'] = df['Engagement Rate'].apply(multiply_ten)
In [65]:
df.drop(['Lifetime Average time video viewed':],axis=1,inplace=True)
In [66]:
df.drop(df.columns[21:],inplace=True,axis=1)
In [68]:
length_msg = pd.Series([len(x) for x in df['Post Message']])
df.insert(0,'Message Length',length_msg)
In [70]:
df.to_csv('dietbet2.csv')
df2 = pd.read_csv('dietbet2.csv',parse_dates=True)
In [ ]:
df3 = pd.read_csv('dietbet2.csv',parse_dates=True)
In [113]:
ax = df3.boxplot(column='Lifetime Post Total Reach',by='Type')
In [74]:
df2['Posted'] = pd.to_datetime(df['Posted'])
In [76]:
df.dtypes
Out[76]:
In [82]:
df_freq = df2[['Posted','Lifetime Post Total Reach','Engagement Rate','Lifetime Post Consumptions']]
In [83]:
df_freq['count'] = 1
In [84]:
df_freq.drop('Engagement Rate',inplace=True,axis=1)
In [86]:
df_freq.set_index('Posted',inplace=True)
df_resample = df_freq.resample('D',how='sum')
In [88]:
df_resample.dropna(inplace=True)
In [90]:
df_resample['Mean Reach'] = df_resample['Lifetime Post Total Reach'] / df_resample['count']
df_resample['Mean Consumption'] = df_resample['Lifetime Post Consumptions'] / df_resample['count']
In [94]:
df_resample
df_resample['Consumption Rate'] = df_resample['Mean Consumption'] / df_resample['Mean Reach']
In [97]:
df_resample.corr()
Out[97]:
In [98]:
df_resample.describe()
Out[98]:
In [100]:
df_resample[(df_resample['count'] > 4)]
Out[100]:
In [107]:
_ = df_resample.plot(kind='scatter',x='count',y='Lifetime Post Consumptions')
In [ ]: