In [ ]:


In [ ]:


In [ ]:

iPython notebook - Daryna analysis - Conversions by website pages

1. Import libraries


In [9]:
%matplotlib inline 

import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options
  1. Settings

In [10]:
# Time period 24th Jan - 24th April (arbitrary )

# API credentials
# Email address 705762800217-compute@developer.gserviceaccount.com
# Key IDs 948ee8e2a420ef14a5d5a29bd35104fe2f1e6ed4

In [11]:
# open file. It is requested via API explorer using request parameters:

#Account: TMRW Tech Hub
#Property: TMRW
#View: All Web Site Data
#ids: ga:123303369
#start-date: 2017-01-24
#end-date: 2017-04-24

#metrics
#ga:sessions
#ga:sessionsWithEvent

#dimensions
#ga:pagePath

#sort
#-ga:sessionsWithEvent

#filter
#ga:sessions>10

In [14]:
# Open file
# original file exported from GA includes ga:pagePath,ga:sessions,ga:sessionsWithEvent
# Calculate "rate" as "Sessions with event"/"Sessions" for each page.

TMRW_events= pd.read_csv("files/TMRW_events.csv")
TMRW_events


Out[14]:
ga:pagePath ga:sessions ga:sessionsWithEvent cr
0 '/ 4436 82 0.018485
1 '/TMRW_FAQs.php 100 26 0.260000
2 '/TMRW_Byte_Cafe.php 218 23 0.105505
3 '/TMRW_the_team.php 99 10 0.101010
4 '/trainstrikes.php 13 0 0.000000
5 '/voteforbyte.php 31 0 0.000000

In [17]:
TMRW_events.columns=["page","sessions","events","rate"]
TMRW_events


Out[17]:
page sessions events rate
0 '/ 4436 82 0.018485
1 '/TMRW_FAQs.php 100 26 0.260000
2 '/TMRW_Byte_Cafe.php 218 23 0.105505
3 '/TMRW_the_team.php 99 10 0.101010
4 '/trainstrikes.php 13 0 0.000000
5 '/voteforbyte.php 31 0 0.000000

In [18]:
TMRW_events_filter = TMRW_events[TMRW_events.rate > 0]
TMRW_events_filter


Out[18]:
page sessions events rate
0 '/ 4436 82 0.018485
1 '/TMRW_FAQs.php 100 26 0.260000
2 '/TMRW_Byte_Cafe.php 218 23 0.105505
3 '/TMRW_the_team.php 99 10 0.101010

In [19]:
TMRW_events_filter.describe()


Out[19]:
sessions events rate
count 4.00000 4.000000 4.000000
mean 1213.25000 35.250000 0.121250
std 2149.22612 31.930915 0.100780
min 99.00000 10.000000 0.018485
25% 99.75000 19.750000 0.080379
50% 159.00000 24.500000 0.103257
75% 1272.50000 40.000000 0.144128
max 4436.00000 82.000000 0.260000

In [20]:
#import numpy as np
from bokeh.io import output_notebook
from bokeh.charts import Bar, show

output_notebook()
p = Bar(TMRW_events_filter, 'page', values='rate', title="Events per page")
show(p)


Loading BokehJS ...

In [65]:
TMRW_events_data = TMRW_events_filter.groupby(['page']).mean()
TMRW_events_data


Out[65]:
rate
page
'/ 0.0185
'/TMRW_Byte_Cafe.php 0.1055
'/TMRW_FAQs.php 0.2600
'/TMRW_the_team.php 0.1010

In [67]:
selected=TMRW_events_data.loc[:,"rate"]
selected


Out[67]:
page
'/                      0.0185
'/TMRW_Byte_Cafe.php    0.1055
'/TMRW_FAQs.php         0.2600
'/TMRW_the_team.php     0.1010
Name: rate, dtype: float64

In [68]:
labels = selected.index
sizes = TMRW_events_filter['rate']
colors = ['green','yellow', 'red', 'lightskyblue']
explode = (0, 0, 0,0)
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=False, startangle=90)
plt.legend(patches, labels, loc="best")
plt.axis('equal')
plt.title('Conversions by pages                                    ')
plt.tight_layout()
plt.show()



In [ ]:


In [ ]: