User groups


In [38]:
request = "POST https://analyticsreporting.googleapis.com/v4/reports:batchGet?fields=reports(columnHeader%2Cdata(rows%2Ctotals))&key={YOUR_API_KEY}"
request = {
 "reportRequests": [
  {
   "viewId": "123303369",
   "dateRanges": [
    {
     "startDate": "2017-01-01",
     "endDate": "2017-04-30"
    }
   ],
   "metrics": [
    {
     "expression": "ga:sessions"
    },
    {
     "expression": "ga:sessionDuration"
    },
    {
     "expression": "ga:goal1Completions"
    },
    {
     "expression": "ga:bounceRate"
    }
   ],
   "dimensions": [
    {
     "name": "ga:city"
    },
    {
     "name": "ga:userAgeBracket"
    }
   ]
  }
 ]
}

In [39]:
import json

with open('data/TMRW_user_groups.json') as file:
    input_ugroups = json.load(file)
    
#input_ugroups

# Define dimensions list
input_ugroups_dimensions = input_ugroups['reports'][0]['columnHeader']['dimensions']

dimension_count = len(input_ugroups_dimensions)

# Define metrics list
input_ugroups_metrics = input_ugroups['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']

def create_metric_list(raw_data):
    lst = []
    for item in raw_data:
        lst.append(item['name'])
    return lst

input_ugroups_metrics = create_metric_list(input_ugroups_metrics)


# Create input data

input_ugroups_data = input_ugroups['reports'][0]['data']['rows']

input_ugroups_data


Out[39]:
[{'dimensions': ['Croydon', '18-24'],
  'metrics': [{'values': ['101',
     '41.584158415841586',
     '4',
     '3.9603960396039604']}]},
 {'dimensions': ['Croydon', '25-34'],
  'metrics': [{'values': ['334',
     '47.90419161676647',
     '17',
     '5.089820359281437']}]},
 {'dimensions': ['Croydon', '35-44'],
  'metrics': [{'values': ['223',
     '43.04932735426009',
     '7',
     '3.1390134529147984']}]},
 {'dimensions': ['Croydon', '45-54'],
  'metrics': [{'values': ['90',
     '45.55555555555556',
     '2',
     '2.2222222222222223']}]},
 {'dimensions': ['Croydon', '55-64'],
  'metrics': [{'values': ['32', '53.125', '1', '3.125']}]},
 {'dimensions': ['London', '18-24'],
  'metrics': [{'values': ['167',
     '49.700598802395206',
     '8',
     '4.790419161676647']}]},
 {'dimensions': ['London', '25-34'],
  'metrics': [{'values': ['842',
     '59.14489311163895',
     '24',
     '2.8503562945368173']}]},
 {'dimensions': ['London', '35-44'],
  'metrics': [{'values': ['482',
     '54.77178423236515',
     '14',
     '2.904564315352697']}]},
 {'dimensions': ['London', '45-54'],
  'metrics': [{'values': ['205',
     '55.60975609756098',
     '3',
     '1.4634146341463417']}]},
 {'dimensions': ['London', '55-64'],
  'metrics': [{'values': ['37', '48.64864864864865', '0', '0.0']}]}]

In [40]:
values_list = []
for group in input_ugroups_data:
    new_dim_name = group['dimensions'][0] + ", " + group['dimensions'][1]
    group[new_dim_name] = group['metrics'][0]
    del group['dimensions']
    del group['metrics']
    
    #conv_rate = round(float(int(group[new_dim_name]['values'][2])/int(group[new_dim_name]['values'][0])*100),2)
    group[new_dim_name]['values'] = list(map(float,group[new_dim_name]['values']))
    group[new_dim_name]['values'].append(new_dim_name)
    values_list.append(group[new_dim_name]['values'])
    #list(map(float,group[new_dim_name]['values'])).append(new_dim_name)
    #values_list = values_list.apply(lambda x: x[0][0][1])

    #group[new_dim_name]['values'].append(conv_rate)
    #print(group[new_dim_name])
    
values_list    
#input_ugroups_data


Out[40]:
[[101.0, 41.584158415841586, 4.0, 3.9603960396039604, 'Croydon, 18-24'],
 [334.0, 47.90419161676647, 17.0, 5.089820359281437, 'Croydon, 25-34'],
 [223.0, 43.04932735426009, 7.0, 3.1390134529147984, 'Croydon, 35-44'],
 [90.0, 45.55555555555556, 2.0, 2.2222222222222223, 'Croydon, 45-54'],
 [32.0, 53.125, 1.0, 3.125, 'Croydon, 55-64'],
 [167.0, 49.700598802395206, 8.0, 4.790419161676647, 'London, 18-24'],
 [842.0, 59.14489311163895, 24.0, 2.8503562945368173, 'London, 25-34'],
 [482.0, 54.77178423236515, 14.0, 2.904564315352697, 'London, 35-44'],
 [205.0, 55.60975609756098, 3.0, 1.4634146341463417, 'London, 45-54'],
 [37.0, 48.64864864864865, 0.0, 0.0, 'London, 55-64']]

In [41]:
# Define each metric dict

ugroups_data = {}

for ugroup in input_ugroups_data:
    #print (ugroup)
    
    for gr in ugroup:
        ugroups_data[gr] = {'sessions':0,
                            'bounce_rate':0,
                            'conversions':0,
                            'conversion_rate':0,
                            'city_age':'',}
        
        ugroups_data[gr]['sessions'] = round(float(ugroup[gr]['values'][0]),2)
        ugroups_data[gr]['conversions'] = round(float(ugroup[gr]['values'][1]),2)
        ugroups_data[gr]['bounce_rate'] = round(float(ugroup[gr]['values'][2]),2)
        ugroups_data[gr]['conversion_rate'] = round(float(ugroup[gr]['values'][3]),2)
        ugroups_data[gr]['city_age'] = list(ugroups_data.keys())
        
#ugroups_data

In [42]:
import collections
from collections import OrderedDict

columns = []
for u in ugroups_data:
    #print (test[r])
    for metric in ugroups_data[u]:
        columns.append(metric)
    
columns = list(OrderedDict.fromkeys(columns))    
columns


Out[42]:
['sessions', 'bounce_rate', 'conversions', 'conversion_rate', 'city_age']

In [43]:
import pandas as pd

In [44]:
df = pd.DataFrame(values_list, columns = columns)

df.to_json(orient='split')
table_data = pd.read_json(df.to_json(orient='split'), orient='split')
table_data.conversion_rate = round(table_data.conversion_rate,2)
table_data = table_data[table_data.conversions > 0]
table_data


Out[44]:
sessions bounce_rate conversions conversion_rate city_age
0 101 41.584158 4 3.96 Croydon, 18-24
1 334 47.904192 17 5.09 Croydon, 25-34
2 223 43.049327 7 3.14 Croydon, 35-44
3 90 45.555556 2 2.22 Croydon, 45-54
4 32 53.125000 1 3.12 Croydon, 55-64
5 167 49.700599 8 4.79 London, 18-24
6 842 59.144893 24 2.85 London, 25-34
7 482 54.771784 14 2.90 London, 35-44
8 205 55.609756 3 1.46 London, 45-54

In [45]:
samples_sessions=[]
for i in range(0,len(table_data)):
    a = [table_data.sessions[i]]
    samples_sessions.append(a)
    
samples_sessions = sorted(samples_sessions, key=lambda x: x[0])
samples_sessions


Out[45]:
[[32], [90], [101], [167], [205], [223], [334], [482], [842]]

In [46]:
mediana_number = int(len(samples_sessions)/2)
mediana_number


Out[46]:
4

In [47]:
import random
import math

NUM_CLUSTERS = 3
TOTAL_DATA = len(samples_sessions)
LOWEST_SAMPLE_POINT = samples_sessions.index(min(samples_sessions)) #element 9 of SAMPLES.
Middle_SAMPLE_POINT = mediana_number
HIGHEST_SAMPLE_POINT = samples_sessions.index(max(samples_sessions)) #element 6 of SAMPLES.
BIG_NUMBER = math.pow(10, 10)


SAMPLES = samples_sessions
data1 = []
centroids = []

class DataPoint:
    def __init__(self, x):
        self.x = x
          
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x
      
    def set_cluster(self, clusterNumber):
        self.clusterNumber = clusterNumber
    
    def get_cluster(self):
        return self.clusterNumber

class Centroid:
    def __init__(self, x):
        self.x = x
    
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x

In [48]:
def initialize_centroids():
    # Set the centoid coordinates to match the data points furthest from each other.
    # In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
    centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
    
    print("Centroids initialized at:")
    print("(", centroids[0].get_x(),")")
    print("(", centroids[1].get_x(),")")
    print("(", centroids[2].get_x(),")")
    print()
    return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))

In [49]:
def initialize_datapoints():
    # DataPoint objects' x and y values are taken from the SAMPLE array.
    # The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
    # assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
    for i in range(TOTAL_DATA):
        newPoint = DataPoint(SAMPLES[i][0])
        
        if(i == LOWEST_SAMPLE_POINT):
            newPoint.set_cluster(0)
        elif(i == Middle_SAMPLE_POINT):
            newPoint.set_cluster(1)
        elif(i == HIGHEST_SAMPLE_POINT):
            newPoint.set_cluster(2)
        else:
            newPoint.set_cluster(None)
            
        data1.append(newPoint)
    
    return

In [50]:
def get_distance(dataPointX, centroidX):
    # Calculate Euclidean distance.
    return math.sqrt(math.pow((centroidX - dataPointX), 2))

In [51]:
def recalculate_centroids():
    totalX = 0
    totalInCluster = 0
    
    for j in range(NUM_CLUSTERS):
        for k in range(len(data1)):
            if(data1[k].get_cluster() == j):
                totalX += data1[k].get_x()
                totalInCluster += 1
        
        if(totalInCluster > 0):
            centroids[j].set_x(totalX / totalInCluster)
               
    return

print(recalculate_centroids())


None

In [52]:
def update_clusters():
    isStillMoving = 0
    
    for i in range(TOTAL_DATA):
        bestMinimum = BIG_NUMBER
        currentCluster = 0
        
        for j in range(NUM_CLUSTERS):
            distance = get_distance(data1[i].get_x(), centroids[j].get_x())
            if(distance < bestMinimum):
                bestMinimum = distance
                currentCluster = j
        
        data1[i].set_cluster(currentCluster)
        
        if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
            data1[i].set_cluster(currentCluster)
            isStillMoving = 1
    
    return isStillMoving

In [53]:
def perform_kmeans():
    isStillMoving = 1
    
    initialize_centroids()
    
    initialize_datapoints()
    
    while(isStillMoving):
        recalculate_centroids()
        isStillMoving = update_clusters()
    
    return

perform_kmeans()


Centroids initialized at:
( 32 )
( 205 )
( 842 )


In [54]:
def print_results():
    result_list = []
    for i in range(NUM_CLUSTERS):
        
        for j in range(TOTAL_DATA):            
            
            if(data1[j].get_cluster() == i):
                result = []               
                result.append(data1[j].get_cluster())
                result.append(data1[j].get_x()) 
                result_list.append(result)
                               
    return result_list

print_results()


Out[54]:
[[0, 32],
 [1, 90],
 [1, 101],
 [1, 167],
 [1, 205],
 [1, 223],
 [2, 334],
 [2, 482],
 [2, 842]]

In [55]:
cluster_sessions = pd.DataFrame(print_results(),columns = ['#cluster_for_sess','sessions'])
cluster_sessions


Out[55]:
#cluster_for_sess sessions
0 0 32
1 1 90
2 1 101
3 1 167
4 1 205
5 1 223
6 2 334
7 2 482
8 2 842

In [56]:
result_sessions = (table_data.sort('sessions')).merge(cluster_sessions, on = 'sessions')
result_sessions


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[56]:
sessions bounce_rate conversions conversion_rate city_age #cluster_for_sess
0 32 53.125000 1 3.12 Croydon, 55-64 0
1 90 45.555556 2 2.22 Croydon, 45-54 1
2 101 41.584158 4 3.96 Croydon, 18-24 1
3 167 49.700599 8 4.79 London, 18-24 1
4 205 55.609756 3 1.46 London, 45-54 1
5 223 43.049327 7 3.14 Croydon, 35-44 1
6 334 47.904192 17 5.09 Croydon, 25-34 2
7 482 54.771784 14 2.90 London, 35-44 2
8 842 59.144893 24 2.85 London, 25-34 2

In [ ]:


In [57]:
samples_cr=[]
for i in range(0,len(table_data)):
    a = [table_data.conversion_rate[i]]
    samples_cr.append(a)
    
samples_cr = sorted(samples_cr, key=lambda x: x[0])
samples_cr


Out[57]:
[[1.46],
 [2.2200000000000002],
 [2.8500000000000001],
 [2.8999999999999999],
 [3.1200000000000001],
 [3.1400000000000001],
 [3.96],
 [4.79],
 [5.0899999999999999]]

In [58]:
import random
import math

NUM_CLUSTERS = 3
TOTAL_DATA = len(samples_cr)
LOWEST_SAMPLE_POINT = samples_cr.index(min(samples_cr)) #element 9 of SAMPLES.
Middle_SAMPLE_POINT = mediana_number
HIGHEST_SAMPLE_POINT = samples_cr.index(max(samples_cr)) #element 6 of SAMPLES.
BIG_NUMBER = math.pow(10, 10)


SAMPLES = samples_cr
data1 = []
centroids = []

class DataPoint:
    def __init__(self, x):
        self.x = x
          
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x
      
    def set_cluster(self, clusterNumber):
        self.clusterNumber = clusterNumber
    
    def get_cluster(self):
        return self.clusterNumber

class Centroid:
    def __init__(self, x):
        self.x = x
    
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x

In [59]:
def initialize_centroids():
    # Set the centoid coordinates to match the data points furthest from each other.
    # In this example, [31, 51.613, 1, 3.2260000000000004] and [758, 59.234999999999999, 22, 2.9019999999999997]
    centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
        
    print("Centroids initialized at:")
    print("(", centroids[0].get_x(),")")
    print("(", centroids[1].get_x(),")")
    print("(", centroids[2].get_x(),")")
    print()
    return
#print(initialize_centroids())
#print(centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0])))

In [60]:
def initialize_datapoints():
    # DataPoint objects' x and y values are taken from the SAMPLE array.
    # The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
    # assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
    for i in range(TOTAL_DATA):
        newPoint = DataPoint(SAMPLES[i][0])
        
        if(i == LOWEST_SAMPLE_POINT):
            newPoint.set_cluster(0)
        elif(i == Middle_SAMPLE_POINT):
            newPoint.set_cluster(1)
        elif(i == HIGHEST_SAMPLE_POINT):
            newPoint.set_cluster(2)
        else:
            newPoint.set_cluster(None)
            
        data1.append(newPoint)
    
    return

In [61]:
def get_distance(dataPointX, centroidX):
    # Calculate Euclidean distance.
    return math.sqrt(math.pow((centroidX - dataPointX), 2))

In [62]:
def recalculate_centroids():
    totalX = 0
    totalInCluster = 0
    
    for j in range(NUM_CLUSTERS):
        for k in range(len(data1)):
            if(data1[k].get_cluster() == j):
                totalX += data1[k].get_x()
                totalInCluster += 1
        
        if(totalInCluster > 0):
            centroids[j].set_x(totalX / totalInCluster)
               
    return

print(recalculate_centroids())


None

In [72]:
def update_clusters():
    isStillMoving = 0
    
    for i in range(TOTAL_DATA):
        bestMinimum = BIG_NUMBER
        currentCluster = 0
        
        for j in range(NUM_CLUSTERS):
            distance = get_distance(data1[i].get_x(), centroids[j].get_x())
            if(distance < bestMinimum):
                bestMinimum = distance
                currentCluster = j
        
        data1[i].set_cluster(currentCluster)
        
        if(data1[i].get_cluster() is None or data1[i].get_cluster() != currentCluster):
            data1[i].set_cluster(currentCluster)
            isStillMoving = 1
    
    return isStillMoving

In [87]:
def perform_kmeans():
    isStillMoving = 1
    
    initialize_centroids()
    
    initialize_datapoints()
    
    while(isStillMoving):
        recalculate_centroids()
        isStillMoving = update_clusters()
    
    return
perform_kmeans()


Centroids initialized at:
( 1.46 )
( 2.27043478261 )
( 3.23026666667 )


In [29]:
def print_results():
    result_list = []
    for i in range(NUM_CLUSTERS):
        
        for j in range(TOTAL_DATA):            
            
            if(data1[j].get_cluster() == i):
                result = []               
                result.append(data1[j].get_cluster())
                result.append(data1[j].get_x()) 
                result_list.append(result)
               
    return result_list

print_results()


Out[29]:
[[0, 1.46],
 [1, 2.2200000000000002],
 [2, 2.8500000000000001],
 [2, 2.8999999999999999],
 [2, 3.1200000000000001],
 [2, 3.1400000000000001],
 [2, 3.96],
 [2, 4.79],
 [2, 5.0899999999999999]]

In [ ]:


In [88]:
samples_cr=[]
for i in range(0,len(table_data)):
    a = [table_data.conversion_rate[i]]
    samples_cr.append(a)
    
samples_cr = sorted(samples_cr, key=lambda x: x[0])
samples_cr


Out[88]:
[[1.46],
 [2.2200000000000002],
 [2.8500000000000001],
 [2.8999999999999999],
 [3.1200000000000001],
 [3.1400000000000001],
 [3.96],
 [4.79],
 [5.0899999999999999]]

In [89]:
len(samples_cr)


Out[89]:
9

In [ ]:
import random
import math

NUM_CLUSTERS = 3
TOTAL_DATA = 9
LOWEST_SAMPLE_POINT = 0 #element 0 of SAMPLES.
Middle_SAMPLE_POINT = 4
HIGHEST_SAMPLE_POINT = 8 #element 3 of SAMPLES.
BIG_NUMBER = math.pow(10, 10)

SAMPLES = samples_cr

data = []
centroids = []

class DataPoint:
    def __init__(self, x):
        self.x = x
       
    
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x
    
    def set_cluster(self, clusterNumber):
        self.clusterNumber = clusterNumber
    
    def get_cluster(self):
        return self.clusterNumber

class Centroid:
    def __init__(self, x):
        self.x = x
         
    def set_x(self, x):
        self.x = x
    
    def get_x(self):
        return self.x
    
def initialize_centroids():
    # Set the centoid coordinates to match the data points furthest from each other.
    # In this example, (1.0, 1.0) and (5.0, 7.0)
    centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[Middle_SAMPLE_POINT][0]))
    centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0]))
    
    print("Centroids initialized at:")
    print("(", centroids[0].get_x(), ")")
    print("(", centroids[1].get_x(), ")")
    print("(", centroids[2].get_x(), ")")
    print()
    return

def initialize_datapoints():
    # DataPoint objects' x and y values are taken from the SAMPLE array.
    # The DataPoints associated with LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT are initially
    # assigned to the clusters matching the LOWEST_SAMPLE_POINT and HIGHEST_SAMPLE_POINT centroids.
    for i in range(TOTAL_DATA):
        newPoint = DataPoint(SAMPLES[i][0])
        
        if(i == LOWEST_SAMPLE_POINT):
            newPoint.set_cluster(0)
        elif(i == Middle_SAMPLE_POINT):
            newPoint.set_cluster(1)
        elif(i == HIGHEST_SAMPLE_POINT):
            newPoint.set_cluster(2)
        else:
            newPoint.set_cluster(None)
            
        data.append(newPoint)
    
    return

def get_distance(dataPointX, centroidX, ):
    # Calculate Euclidean distance.
    return math.sqrt(math.pow((centroidX - dataPointX), 2))

def recalculate_centroids():
    totalX = 0
    totalY = 0
    totalInCluster = 0
    
    for j in range(NUM_CLUSTERS):
        for k in range(len(data)):
            if(data[k].get_cluster() == j):
                totalX += data[k].get_x()
                totalInCluster += 1
        
        if(totalInCluster > 0):
            centroids[j].set_x(totalX / totalInCluster)
           
    
    return

def update_clusters():
    isStillMoving = 0
    
    for i in range(TOTAL_DATA):
        bestMinimum = BIG_NUMBER
        currentCluster = 0
        
        for j in range(NUM_CLUSTERS):
            distance = get_distance(data[i].get_x(), centroids[j].get_x())
            if(distance < bestMinimum):
                bestMinimum = distance
                currentCluster = j
        
        data[i].set_cluster(currentCluster)
        
        if(data[i].get_cluster() is None or data[i].get_cluster() != currentCluster):
            data[i].set_cluster(currentCluster)
            isStillMoving = 1
    
    return isStillMoving

def perform_kmeans():
    isStillMoving = 1
    
    initialize_centroids()
    
    initialize_datapoints()
    
    while(isStillMoving):
        recalculate_centroids()
        isStillMoving = update_clusters()
    
    return
perform_kmeans()
def print_results():
    for i in range(NUM_CLUSTERS):
        print("Cluster ", i, " includes:")
        for j in range(TOTAL_DATA):
            if(data[j].get_cluster() == i):
                print("(", data[j].get_x(), ")")
        print()
    
    return

perform_kmeans()
print_results()

In [109]:
def print_results():
    for i in range(NUM_CLUSTERS):
        print("Cluster ", i, " includes:")
        for j in range(TOTAL_DATA):
            if(data[j].get_cluster() == i):
                print("(", data[j].get_x(), ")")
        print()
    
    return

perform_kmeans()
print_results()


Centroids initialized at:
( 1.46 )
( 2.29 )
( 3.22333333333 )

Cluster  0  includes:
( 1.46 )

Cluster  1  includes:
( 2.22 )

Cluster  2  includes:
( 2.85 )
( 2.9 )
( 3.12 )
( 3.14 )
( 3.96 )
( 4.79 )
( 5.09 )


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [32]:
cluster_cr = pd.DataFrame(print_results(),columns = ['#cluster_for_cr','conversion_rate'])
cluster_cr


Out[32]:
#cluster_for_cr conversion_rate
0 0 1.46
1 1 2.22
2 2 2.85
3 2 2.90
4 2 3.12
5 2 3.14
6 2 3.96
7 2 4.79
8 2 5.09

In [33]:
result_cr = (table_data.sort('conversion_rate')).merge(cluster_cr, on = 'conversion_rate')
result_cr


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[33]:
sessions bounce_rate conversions conversion_rate city_age #cluster_for_cr
0 205 55.609756 3 1.46 London, 45-54 0
1 90 45.555556 2 2.22 Croydon, 45-54 1
2 842 59.144893 24 2.85 London, 25-34 2
3 482 54.771784 14 2.90 London, 35-44 2
4 32 53.125000 1 3.12 Croydon, 55-64 2
5 223 43.049327 7 3.14 Croydon, 35-44 2
6 101 41.584158 4 3.96 Croydon, 18-24 2
7 167 49.700599 8 4.79 London, 18-24 2
8 334 47.904192 17 5.09 Croydon, 25-34 2

In [34]:
finish = result_sessions.merge(result_cr, on = "city_age")
finish


Out[34]:
sessions_x bounce_rate_x conversions_x conversion_rate_x city_age #cluster_for_sess sessions_y bounce_rate_y conversions_y conversion_rate_y #cluster_for_cr
0 32 53.125000 1 3.12 Croydon, 55-64 0 32 53.125000 1 3.12 2
1 90 45.555556 2 2.22 Croydon, 45-54 1 90 45.555556 2 2.22 1
2 101 41.584158 4 3.96 Croydon, 18-24 1 101 41.584158 4 3.96 2
3 167 49.700599 8 4.79 London, 18-24 1 167 49.700599 8 4.79 2
4 205 55.609756 3 1.46 London, 45-54 1 205 55.609756 3 1.46 0
5 223 43.049327 7 3.14 Croydon, 35-44 1 223 43.049327 7 3.14 2
6 334 47.904192 17 5.09 Croydon, 25-34 2 334 47.904192 17 5.09 2
7 482 54.771784 14 2.90 London, 35-44 2 482 54.771784 14 2.90 2
8 842 59.144893 24 2.85 London, 25-34 2 842 59.144893 24 2.85 2

In [36]:
finish = finish[['city_age','#cluster_for_sess','#cluster_for_cr','sessions_x','conversion_rate_x',]]
finish


Out[36]:
city_age #cluster_for_sess #cluster_for_cr sessions_x conversion_rate_x
0 Croydon, 55-64 0 2 32 3.12
1 Croydon, 45-54 1 1 90 2.22
2 Croydon, 18-24 1 2 101 3.96
3 London, 18-24 1 2 167 4.79
4 London, 45-54 1 0 205 1.46
5 Croydon, 35-44 1 2 223 3.14
6 Croydon, 25-34 2 2 334 5.09
7 London, 35-44 2 2 482 2.90
8 London, 25-34 2 2 842 2.85

In [ ]: