In [146]:
request = "POST https://analyticsreporting.googleapis.com/v4/reports:batchGet?fields=reports(columnHeader%2Cdata(rows%2Ctotals))&key={YOUR_API_KEY}"
request = {
"reportRequests": [
{
"viewId": "123303369",
"dateRanges": [
{
"startDate": "2017-01-01",
"endDate": "2017-04-30"
}
],
"metrics": [
{
"expression": "ga:sessions"
},
{
"expression": "ga:sessionDuration"
},
{
"expression": "ga:goal1Completions"
},
{
"expression": "ga:bounceRate"
}
],
"dimensions": [
{
"name": "ga:city"
},
{
"name": "ga:userAgeBracket"
}
]
}
]
}
In [147]:
import json
with open('data/TMRW_user_groups.json') as file:
input_ugroups = json.load(file)
#input_ugroups
# Define dimensions list
input_ugroups_dimensions = input_ugroups['reports'][0]['columnHeader']['dimensions']
dimension_count = len(input_ugroups_dimensions)
# Define metrics list
input_ugroups_metrics = input_ugroups['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']
def create_metric_list(raw_data):
lst = []
for item in raw_data:
lst.append(item['name'])
return lst
input_ugroups_metrics = create_metric_list(input_ugroups_metrics)
# Create input data
input_ugroups_data = input_ugroups['reports'][0]['data']['rows']
input_ugroups_data
Out[147]:
In [148]:
values_list = []
for group in input_ugroups_data:
new_dim_name = group['dimensions'][0] + ", " + group['dimensions'][1]
group[new_dim_name] = group['metrics'][0]
del group['dimensions']
del group['metrics']
group[new_dim_name]['values'] = list(map(float,group[new_dim_name]['values']))
#conv_rate = round(float(int(group[new_dim_name]['values'][2])/int(group[new_dim_name]['values'][0])*100),2)
values_list.append(group[new_dim_name]['values'])
group[new_dim_name]['values'].insert(0,new_dim_name)
#print(group[new_dim_name]['values'])
#group[new_dim_name]['values'].append(conv_rate)
#values_list
input_ugroups_data
Out[148]:
In [149]:
# Define each metric dict
ugroups_data = {}
for ugroup in input_ugroups_data:
#print (ugroup)
for gr in ugroup:
ugroups_data[gr] = {'sessions':0,
'bounce_rate':0,
'conversions':0,
'conversion_rate':0}
ugroups_data[gr]['sessions'] = round(float(ugroup[gr]['values'][1]),2)
ugroups_data[gr]['conversions'] = round(float(ugroup[gr]['values'][2]),2)
ugroups_data[gr]['bounce_rate'] = round(float(ugroup[gr]['values'][3]),2)
ugroups_data[gr]['conversion_rate'] = round(float(ugroup[gr]['values'][4]),2)
ugroups_data
Out[149]:
In [150]:
rows = list(ugroups_data.keys())
rows
Out[150]:
In [151]:
import collections
from collections import OrderedDict
columns = []
for u in ugroups_data:
#print (u)
for metric in ugroups_data[u]:
columns.append('city_age')
columns.append(metric)
columns = list(OrderedDict.fromkeys(columns))
columns
Out[151]:
In [152]:
import pandas as pd
In [153]:
df = pd.DataFrame(values_list,
columns = columns)
df.to_json(orient='split')
table = pd.read_json(df.to_json(orient='split'), orient='split')
table.conversion_rate = round(table.conversion_rate,2)
table.bounce_rate = round(table.bounce_rate,2)
table
Out[153]:
In [154]:
table2 = pd.DataFrame([['Croydon, 18-24',0],['Croydon, 25-34',1],['London, 18-24',0],['Croydon, 45-54',1]],
columns = ['city_age','cluster'])
table2
Out[154]:
In [155]:
data = table2.merge(table, on='city_age')
data
Out[155]:
In [156]:
samples1=[]
for i in range(0,len(table)):
a = [table.sessions[i]]
#print(a)
samples1.append(a)
samples1
#return samples1
#print(samples1)
Out[156]:
In [157]:
mv = sum(table.sessions)/len(samples1)
mv
Out[157]:
In [158]:
import random
import math
NUM_CLUSTERS = 3
TOTAL_DATA = len(samples1)
LOWEST_SAMPLE_POINT = samples1.index(min(samples1)) #element 9 of SAMPLES.
HIGHEST_SAMPLE_POINT = samples1.index(max(samples1)) #element 6 of SAMPLES.
Middle_SAMPLE_POINT = 2
BIG_NUMBER = math.pow(10, 10)
SAMPLES = samples1
data1 = []
centroids = []
class DataPoint:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
In [ ]:
In [159]:
def initialize_centroids():
# Set the centoid coordinates to match the data points furthest from each other.
# In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
print("Centroids initialized at:")
print("(", centroids[0].get_x(),")")
print("(", centroids[1].get_x(),")")
print("(", centroids[2].get_x(),")")
print()
return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))
In [160]:
def initialize_datapoints():
# DataPoint objects' x and y values are taken from the SAMPLE array.
# The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
# assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
for i in range(TOTAL_DATA):
newPoint = DataPoint(SAMPLES[i][0])
if(i == LOWEST_SAMPLE_POINT):
newPoint.set_cluster(0)
elif(i == HIGHEST_SAMPLE_POINT):
newPoint.set_cluster(1)
elif(i == Middle_SAMPLE_POINT):
newPoint.set_cluster(2)
else:
newPoint.set_cluster(None)
data1.append(newPoint)
return
In [ ]:
In [161]:
def get_distance(dataPointX, centroidX):
# Calculate Euclidean distance.
return math.sqrt(math.pow((centroidX - dataPointX), 2))
In [162]:
def recalculate_centroids():
totalX = 0
totalInCluster = 0
for j in range(NUM_CLUSTERS):
for k in range(len(data1)):
if(data1[k].get_cluster() == j):
totalX += data1[k].get_x()
totalInCluster += 1
if(totalInCluster > 0):
centroids[j].set_x(totalX / totalInCluster)
return
print(recalculate_centroids())
In [163]:
def update_clusters():
isStillMoving = 0
for i in range(TOTAL_DATA):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(NUM_CLUSTERS):
distance = get_distance(data1[i].get_x(), centroids[j].get_x())
if(distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data1[i].set_cluster(currentCluster)
if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
data1[i].set_cluster(currentCluster)
isStillMoving = 1
return isStillMoving
In [164]:
def perform_kmeans():
isStillMoving = 1
initialize_centroids()
initialize_datapoints()
while(isStillMoving):
recalculate_centroids()
isStillMoving = update_clusters()
return
perform_kmeans()
In [165]:
def print_results():
for i in range(NUM_CLUSTERS):
print("Cluster ", i, " includes:")
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
s = [data1[j].get_x()]
#print("(", data1[j].get_x(), ")")
print(s)
print()
return
print_results()
#print(data1[j].get_x())
In [166]:
table.sort_values(by='conversion_rate')
Out[166]:
In [167]:
sum(table.conversion_rate)/10
Out[167]:
In [168]:
table.conversions[5]
Out[168]:
In [169]:
samples1=[]
for i in range(0,len(table)):
a = [table.conversion_rate[i]]
#print(a)
samples1.append(a)
samples1
#return samples1
#print(samples1)
Out[169]:
In [170]:
samples1=[[3.9603960396039604],
[5.0898203592814371],
[3.1390134529147984],
[2.2222222222222223],
[3.125],
[4.7904191616766472],
[2.8503562945368173],
[2.904564315352697],
[1.4634146341463417]]
In [171]:
samples1[4]
Out[171]:
In [172]:
import random
import math
NUM_CLUSTERS = 3
TOTAL_DATA = len(samples1)
LOWEST_SAMPLE_POINT = samples1.index(min(samples1)) #element 9 of SAMPLES.
HIGHEST_SAMPLE_POINT = samples1.index(max(samples1)) #element 6 of SAMPLES.
Middle_SAMPLE_POINT = 4
BIG_NUMBER = math.pow(10, 10)
SAMPLES = samples1
data1 = []
centroids = []
class DataPoint:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
In [173]:
def initialize_centroids():
# Set the centoid coordinates to match the data points furthest from each other.
# In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
print("Centroids initialized at:")
print("(", centroids[0].get_x(),")")
print("(", centroids[1].get_x(),")")
print("(", centroids[2].get_x(),")")
print()
return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))
In [174]:
def initialize_datapoints():
# DataPoint objects' x and y values are taken from the SAMPLE array.
# The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
# assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
for i in range(TOTAL_DATA):
newPoint = DataPoint(SAMPLES[i][0])
if(i == LOWEST_SAMPLE_POINT):
newPoint.set_cluster(0)
elif(i == HIGHEST_SAMPLE_POINT):
newPoint.set_cluster(1)
elif(i == Middle_SAMPLE_POINT):
newPoint.set_cluster(2)
else:
newPoint.set_cluster(None)
data1.append(newPoint)
return
In [175]:
def get_distance(dataPointX, centroidX):
# Calculate Euclidean distance.
return math.sqrt(math.pow((centroidX - dataPointX), 2))
In [176]:
def recalculate_centroids():
totalX = 0
totalInCluster = 0
for j in range(NUM_CLUSTERS):
for k in range(len(data1)):
if(data1[k].get_cluster() == j):
totalX += data1[k].get_x()
totalInCluster += 1
if(totalInCluster > 0):
centroids[j].set_x(totalX / totalInCluster)
return
print(recalculate_centroids())
In [177]:
def update_clusters():
isStillMoving = 0
for i in range(TOTAL_DATA):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(NUM_CLUSTERS):
distance = get_distance(data1[i].get_x(), centroids[j].get_x())
if(distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data1[i].set_cluster(currentCluster)
if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
data1[i].set_cluster(currentCluster)
isStillMoving = 1
return isStillMoving
In [178]:
def perform_kmeans():
isStillMoving = 1
initialize_centroids()
initialize_datapoints()
while(isStillMoving):
recalculate_centroids()
isStillMoving = update_clusters()
return
perform_kmeans()
In [179]:
def print_results():
for i in range(NUM_CLUSTERS):
print("Cluster ", i, " includes:")
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
print("(", data1[j].get_x(), ")")
#print(data1[j].get_x())
print()
return
print_results()
In [222]:
def print_results():
result_list = []
for i in range(NUM_CLUSTERS):
#print(i)
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
result_list.append([data1[j].get_cluster(), data1[j].get_x()])
#result_list.append(result)
return result_list
print_results()
Out[222]:
In [ ]:
In [183]:
table.sort('sessions')
Out[183]:
In [184]:
table.sort('conversion_rate')
Out[184]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: