In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options
In [ ]:
URL = "tmrw.co" # User-entered website
In [20]:
# Time period 1st Jan - 30th April (arbitrary )
# API credentials
# Email address 705762800217-compute@developer.gserviceaccount.com
# Key IDs 948ee8e2a420ef14a5d5a29bd35104fe2f1e6ed4
In [21]:
# open file. It is requested via API explorer using request parameters:
#Account: TMRW Tech Hub
#Property: TMRW
#View: All Web Site Data
#ids: ga:123303369
#start-date: 2017-01-01
#end-date: 2017-04-30
#metrics
#ga:sessions
#ga:bounceRate
#ga:goal1ConversionRate
#ga:goal1Completions
#dimensions
#ga:city
#ga:userAgeBracket
#sort
#ga:goal1ConversionRate
Skipped step: Check statistical validity Filter off spam traffic and own dev/marketing IPs
Algorithm of actions:
Bounce Rate
Avg. Session Duration
Goal 1 Completions
Goal 1 Conversion Rate
Pages / Session
Take Key_metrics and check volume of traffic and conversions. Which is the most extreme? Conversion bucket = new array TMRW_users_city.max=TMRW_users_city.max()
Define key metrics Key_metrics = location/age. Can be location/gender or age/gender also. For them other API call needs to be made.
Open file
Visualise
In [102]:
# Open file
input_users = pd.read_csv("files/TMRW_users.csv")
# rename columns
input_users.columns=['City','Age','Sessions','BR','Conversions','CR']
#input_users = input_users.sort_values('CR')
input_users_filter = input_users[input_users.Sessions > 80]
input_users_filter
Out[102]:
In [104]:
max_CR = max(input_users_filter.CR)
max_CR
max_CR_City = input_users_filter.set_index('CR').loc[max_CR,"City"]
max_CR_Age = input_users_filter.set_index('CR').loc[max_CR,"Age"]
max_CR_City_Age = max_CR_City + " " + max_CR_Age
max_CR_City_Age
Out[104]:
In [ ]:
In [116]:
#algo for bucketing into varios secsions:
#buckets by CR
input_users_CR = input_users_filter.nlargest(5, 'CR')
input_users_CR_City = input_users_CR.groupby(['City','Age']).mean()
input_users_CR_Age = input_users_CR.groupby(['Age']).mean()
In [106]:
from bokeh.io import output_notebook
from bokeh.charts import Bar, Line, show
from bokeh.plotting import figure, output_file, show
output_notebook()
p = Bar(input_users_filter, label='City', values='CR', stack='Age', title="%s is most converting Demographic category" % max_CR_City_Age, agg="mean", legend="top_right")
show(p)
#print("Conversion rate for homepage is %s" % homepage_CR + "%")
#print("Put an additional button \"Contact Us\" on the main page. (or adding the contact form link to the menu bar at the top of the homepage")
In [107]:
input_users_BR = input_users.nlargest(3, 'BR')
input_users_BR
#if the_largest traffic source = ('not_set') then output error ''
Out[107]:
In [113]:
input_users_city_sum = input_users.groupby(["City"]).sum()
input_users_city_sum
Out[113]:
In [114]:
output_chart_city=input_users_city_sum.loc[:,'Conversions']
output_chart_city
Out[114]:
In [118]:
labels = output_chart_city.index
sizes = output_chart_city
# colours are taken from http://tools.medialab.sciences-po.fr/iwanthue/
colors = ['#1f394d','#2a7585', '#163c45', '#004a6e']
explode = (0, 0, 0, 0)
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=False, startangle=90)
plt.axis('equal')
plt.show()
# Conversion traffic
# Generate text
print ("%s segment converts best" % max_CR_City )
In [ ]: