In [19]:
request = "POST https://analyticsreporting.googleapis.com/v4/reports:batchGet?fields=reports(columnHeader%2Cdata(rows%2Ctotals))&key={YOUR_API_KEY}"
request = {
"reportRequests": [
{
"viewId": "123303369",
"dateRanges": [
{
"startDate": "2017-01-01",
"endDate": "2017-04-30"
}
],
"metrics": [
{
"expression": "ga:sessions"
},
{
"expression": "ga:sessionDuration"
},
{
"expression": "ga:goal1Completions"
},
{
"expression": "ga:bounceRate"
}
],
"dimensions": [
{
"name": "ga:city"
},
{
"name": "ga:userAgeBracket"
}
]
}
]
}
In [20]:
import json
with open('data/TMRW_user_groups.json') as file:
input_ugroups = json.load(file)
#input_ugroups
# Define dimensions list
input_ugroups_dimensions = input_ugroups['reports'][0]['columnHeader']['dimensions']
dimension_count = len(input_ugroups_dimensions)
# Define metrics list
input_ugroups_metrics = input_ugroups['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']
def create_metric_list(raw_data):
lst = []
for item in raw_data:
lst.append(item['name'])
return lst
input_ugroups_metrics = create_metric_list(input_ugroups_metrics)
# Create input data
input_ugroups_data = input_ugroups['reports'][0]['data']['rows']
input_ugroups_data
Out[20]:
In [21]:
values_list = []
for group in input_ugroups_data:
new_dim_name = group['dimensions'][0] + ", " + group['dimensions'][1]
group[new_dim_name] = group['metrics'][0]
del group['dimensions']
del group['metrics']
#conv_rate = round(float(int(group[new_dim_name]['values'][2])/int(group[new_dim_name]['values'][0])*100),2)
values_list.append(group[new_dim_name]['values'])
#group[new_dim_name]['values'].append(conv_rate)
#values_list
input_ugroups_data
Out[21]:
In [22]:
# Define each metric dict
ugroups_data = {}
for ugroup in input_ugroups_data:
#print (ugroup)
for gr in ugroup:
ugroups_data[gr] = {'sessions':0,
'bounce_rate':0,
'conversions':0,
'conversion_rate':0}
ugroups_data[gr]['sessions'] = round(float(ugroup[gr]['values'][0]),2)
ugroups_data[gr]['conversions'] = round(float(ugroup[gr]['values'][1]),2)
ugroups_data[gr]['bounce_rate'] = round(float(ugroup[gr]['values'][2]),2)
ugroups_data[gr]['conversion_rate'] = round(float(ugroup[gr]['values'][3]),2)
#ugroups_data
In [23]:
rows = list(ugroups_data.keys())
rows
Out[23]:
In [24]:
import collections
from collections import OrderedDict
columns = []
for u in ugroups_data:
#print (test[r])
for metric in ugroups_data[u]:
columns.append(metric)
columns = list(OrderedDict.fromkeys(columns))
columns
Out[24]:
In [25]:
import pandas as pd
In [26]:
df = pd.DataFrame(values_list,
index = rows,
columns = columns)
df.to_json(orient='split')
table = pd.read_json(df.to_json(orient='split'), orient='split')
table
Out[26]:
In [116]:
In [27]:
samples1=[]
for i in range(0,len(table)):
a = [table.sessions[i]]
#print(a)
samples1.append(a)
samples1
#return samples1
#print(samples1)
Out[27]:
In [28]:
mv = sum(table.sessions)/len(samples1)
mv
Out[28]:
In [29]:
import random
import math
NUM_CLUSTERS = 3
TOTAL_DATA = len(samples1)
LOWEST_SAMPLE_POINT = samples1.index(min(samples1)) #element 9 of SAMPLES.
HIGHEST_SAMPLE_POINT = samples1.index(max(samples1)) #element 6 of SAMPLES.
Middle_SAMPLE_POINT = 2
BIG_NUMBER = math.pow(10, 10)
SAMPLES = samples1
data1 = []
centroids = []
class DataPoint:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
In [ ]:
In [30]:
def initialize_centroids():
# Set the centoid coordinates to match the data points furthest from each other.
# In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
print("Centroids initialized at:")
print("(", centroids[0].get_x(),")")
print("(", centroids[1].get_x(),")")
print("(", centroids[2].get_x(),")")
print()
return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))
In [31]:
def initialize_datapoints():
# DataPoint objects' x and y values are taken from the SAMPLE array.
# The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
# assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
for i in range(TOTAL_DATA):
newPoint = DataPoint(SAMPLES[i][0])
if(i == LOWEST_SAMPLE_POINT):
newPoint.set_cluster(0)
elif(i == HIGHEST_SAMPLE_POINT):
newPoint.set_cluster(1)
elif(i == Middle_SAMPLE_POINT):
newPoint.set_cluster(2)
else:
newPoint.set_cluster(None)
data1.append(newPoint)
return
In [ ]:
In [32]:
def get_distance(dataPointX, centroidX):
# Calculate Euclidean distance.
return math.sqrt(math.pow((centroidX - dataPointX), 2))
In [33]:
def recalculate_centroids():
totalX = 0
totalInCluster = 0
for j in range(NUM_CLUSTERS):
for k in range(len(data1)):
if(data1[k].get_cluster() == j):
totalX += data1[k].get_x()
totalInCluster += 1
if(totalInCluster > 0):
centroids[j].set_x(totalX / totalInCluster)
return
print(recalculate_centroids())
In [34]:
def update_clusters():
isStillMoving = 0
for i in range(TOTAL_DATA):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(NUM_CLUSTERS):
distance = get_distance(data1[i].get_x(), centroids[j].get_x())
if(distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data1[i].set_cluster(currentCluster)
if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
data1[i].set_cluster(currentCluster)
isStillMoving = 1
return isStillMoving
In [35]:
def perform_kmeans():
isStillMoving = 1
initialize_centroids()
initialize_datapoints()
while(isStillMoving):
recalculate_centroids()
isStillMoving = update_clusters()
return
perform_kmeans()
In [36]:
def print_results():
for i in range(NUM_CLUSTERS):
print("Cluster ", i, " includes:")
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
s = [data1[j].get_x()]
#print("(", data1[j].get_x(), ")")
print(s)
print()
return
print_results()
#print(data1[j].get_x())
In [37]:
table.sort('conversion_rate')
Out[37]:
In [38]:
sum(table.conversion_rate)/10
Out[38]:
In [39]:
table.conversions[5]
Out[39]:
In [40]:
samples1=[]
for i in range(0,len(table)):
a = [table.conversion_rate[i]]
#print(a)
samples1.append(a)
samples1
#return samples1
#print(samples1)
Out[40]:
In [41]:
samples1=[[3.9603960396039604],
[5.0898203592814371],
[3.1390134529147984],
[2.2222222222222223],
[3.125],
[4.7904191616766472],
[2.8503562945368173],
[2.904564315352697],
[1.4634146341463417]]
In [42]:
samples1[4]
Out[42]:
In [43]:
import random
import math
NUM_CLUSTERS = 3
TOTAL_DATA = len(samples1)
LOWEST_SAMPLE_POINT = samples1.index(min(samples1)) #element 9 of SAMPLES.
HIGHEST_SAMPLE_POINT = samples1.index(max(samples1)) #element 6 of SAMPLES.
Middle_SAMPLE_POINT = 4
BIG_NUMBER = math.pow(10, 10)
SAMPLES = samples1
data1 = []
centroids = []
class DataPoint:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x):
self.x = x
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
In [44]:
def initialize_centroids():
# Set the centoid coordinates to match the data points furthest from each other.
# In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
print("Centroids initialized at:")
print("(", centroids[0].get_x(),")")
print("(", centroids[1].get_x(),")")
print("(", centroids[2].get_x(),")")
print()
return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))
In [45]:
def initialize_datapoints():
# DataPoint objects' x and y values are taken from the SAMPLE array.
# The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
# assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
for i in range(TOTAL_DATA):
newPoint = DataPoint(SAMPLES[i][0])
if(i == LOWEST_SAMPLE_POINT):
newPoint.set_cluster(0)
elif(i == HIGHEST_SAMPLE_POINT):
newPoint.set_cluster(1)
elif(i == Middle_SAMPLE_POINT):
newPoint.set_cluster(2)
else:
newPoint.set_cluster(None)
data1.append(newPoint)
return
In [46]:
def get_distance(dataPointX, centroidX):
# Calculate Euclidean distance.
return math.sqrt(math.pow((centroidX - dataPointX), 2))
In [47]:
def recalculate_centroids():
totalX = 0
totalInCluster = 0
for j in range(NUM_CLUSTERS):
for k in range(len(data1)):
if(data1[k].get_cluster() == j):
totalX += data1[k].get_x()
totalInCluster += 1
if(totalInCluster > 0):
centroids[j].set_x(totalX / totalInCluster)
return
print(recalculate_centroids())
In [48]:
def update_clusters():
isStillMoving = 0
for i in range(TOTAL_DATA):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(NUM_CLUSTERS):
distance = get_distance(data1[i].get_x(), centroids[j].get_x())
if(distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data1[i].set_cluster(currentCluster)
if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
data1[i].set_cluster(currentCluster)
isStillMoving = 1
return isStillMoving
In [49]:
def perform_kmeans():
isStillMoving = 1
initialize_centroids()
initialize_datapoints()
while(isStillMoving):
recalculate_centroids()
isStillMoving = update_clusters()
return
perform_kmeans()
In [54]:
def print_results():
for i in range(NUM_CLUSTERS):
print("Cluster ", i, " includes:")
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
print("(", data1[j].get_x(), ")")
#print(data1[j].get_x())
print()
return
print_results()
In [108]:
def print_results():
result_list = []
for i in range(NUM_CLUSTERS):
#print(i)
for j in range(TOTAL_DATA):
if(data1[j].get_cluster() == i):
result = {}
#print (data1[j].get_cluster())
result[data1[j].get_cluster()] = data1[j].get_x()
#result["Cluster %s" % i] = []
#data.append(data1[0].get_x())
#data[0] = 1
#data.append(data1[j].get_x())
#result["Cluster %s" % i] = data
#print(j)
# (result)
#print(data1[j].get_x())
#print()
#print(result)
result_list.append(result)
return result_list
print_results()
Out[108]:
In [113]:
for n in print_results():
#print(n)
for p in n:
print(n[p])
In [ ]:
sessions
Centroids initialized at:
( 32 )
( 842 )
( 223 )
Cluster 0 includes:
( 101 )
( 90 )
( 32 )
( 167 )
( 37 )
Cluster 1 includes:
( 842 )
( 482 )
Cluster 2 includes:
( 334 )
( 223 )
( 205 )
In [197]:
table.sort('sessions')
Out[197]:
In [221]:
table.sort('conversion_rate')
Out[221]:
In [ ]: