In [1]:
import ga_utils as g
import pandas as pd
import auth as auth
import matplotlib
import config as config
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.formula.api as smf
%matplotlib inline
In [2]:
# launch auth process
auth.main()
Out[2]:
In [3]:
pageviews = g.get_all_pages("94275425", start_date='2015-05-01', end_date='2016-05-20')
pages = g.get_all_pageviews("94275425", start_date='2015-05-01', end_date='2016-05-20')
sessions = g.get_sessions("94275425", start_date='2015-05-01', end_date='2016-05-20')
transactions = g.get_transactions_by_day("94275425", start_date='2015-05-01', end_date='2016-05-20')
In [29]:
df = pd.concat([pages.astype(int), transactions['ga:transactions'].astype(int), sessions['ga:sessions'].astype(int)], axis=1)
In [34]:
#pageviews['date'] = pd.to_datetime(pageviews['date'])
df.rename(columns=lambda x: x.replace('ga:', ''), inplace=True)
df = df[df.pageviews!=87293]
df = df[df.pageviews!=122848]
df = df[df.pageviews!=73522]
df.sort_values(by='pageviews', ascending=False)
Out[34]:
In [35]:
fig, axs = plt.subplots(1, 2, sharey=True)
df.plot(kind="scatter", x="pageviews", y='transactions', ax=axs[0], figsize=(16, 8))
df.plot(kind="scatter", x="sessions", y='transactions', ax=axs[1], figsize=(16, 8))
Out[35]:
In [36]:
lm = smf.ols(formula='transactions ~ pageviews', data=df).fit()
In [37]:
lm.params
Out[37]:
In [38]:
lm.params[0] + lm.params[1]*50
Out[38]:
In [39]:
X_new = pd.DataFrame({'pageviews':[df['pageviews'].min(), df['pageviews'].max()]})
X_new.head()
Out[39]:
In [40]:
preds = lm.predict(X_new)
preds
Out[40]:
In [41]:
df.plot(kind="scatter", x="pageviews", y='transactions', color=g.colours['blue'])
plt.plot(X_new, preds,linewidth=2, color = g.colours['red'])
Out[41]:
In [42]:
lm.conf_int()
Out[42]:
In [43]:
lm.pvalues
Out[43]:
In [44]:
lm.rsquared
Out[44]:
In [45]:
lm_multi = smf.ols(formula='transactions ~ sessions + pageviews', data=df).fit()
lm_multi.params
Out[45]: