User groups


In [336]:
request = "POST https://analyticsreporting.googleapis.com/v4/reports:batchGet?fields=reports(columnHeader%2Cdata(rows%2Ctotals))&key={YOUR_API_KEY}"
request = {
 "reportRequests": [
  {
   "viewId": "123303369",
   "dateRanges": [
    {
     "startDate": "2017-01-01",
     "endDate": "2017-04-30"
    }
   ],
   "metrics": [
    {
     "expression": "ga:sessions"
    },
    {
     "expression": "ga:sessionDuration"
    },
    {
     "expression": "ga:goal1Completions"#instead of "ga:goal1Completions" use "goal_to_use_in_request" variable from tracking-tags code
    },
    {
     "expression": "ga:bounceRate"
    }
   ],
   "dimensions": [
    {
     "name": "ga:city"
    },
    {
     "name": "ga:userAgeBracket"
    }
   ]
  }
 ]
}

In [337]:
import json

with open('data/TMRW_user_groups.json') as file:
    input_ugroups = json.load(file)
    
#input_ugroups

# Define dimensions list
input_ugroups_dimensions = input_ugroups['reports'][0]['columnHeader']['dimensions']

dimension_count = len(input_ugroups_dimensions)

# Define metrics list
input_ugroups_metrics = input_ugroups['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']

def create_metric_list(raw_data):
    lst = []
    for item in raw_data:
        lst.append(item['name'])
    return lst

input_ugroups_metrics = create_metric_list(input_ugroups_metrics)


# Create input data

input_ugroups_data = input_ugroups['reports'][0]['data']['rows']

input_ugroups_data


Out[337]:
[{'dimensions': ['Croydon', '18-24'],
  'metrics': [{'values': ['101',
     '41.584158415841586',
     '4',
     '3.9603960396039604']}]},
 {'dimensions': ['Croydon', '25-34'],
  'metrics': [{'values': ['334',
     '47.90419161676647',
     '17',
     '5.089820359281437']}]},
 {'dimensions': ['Croydon', '35-44'],
  'metrics': [{'values': ['223',
     '43.04932735426009',
     '7',
     '3.1390134529147984']}]},
 {'dimensions': ['Croydon', '45-54'],
  'metrics': [{'values': ['90',
     '45.55555555555556',
     '2',
     '2.2222222222222223']}]},
 {'dimensions': ['Croydon', '55-64'],
  'metrics': [{'values': ['32', '53.125', '1', '3.125']}]},
 {'dimensions': ['London', '18-24'],
  'metrics': [{'values': ['167',
     '49.700598802395206',
     '8',
     '4.790419161676647']}]},
 {'dimensions': ['London', '25-34'],
  'metrics': [{'values': ['842',
     '59.14489311163895',
     '24',
     '2.8503562945368173']}]},
 {'dimensions': ['London', '35-44'],
  'metrics': [{'values': ['482',
     '54.77178423236515',
     '14',
     '2.904564315352697']}]},
 {'dimensions': ['London', '45-54'],
  'metrics': [{'values': ['205',
     '55.60975609756098',
     '3',
     '1.4634146341463417']}]},
 {'dimensions': ['London', '55-64'],
  'metrics': [{'values': ['37', '48.64864864864865', '0', '0.0']}]}]

In [338]:
values_list = []
for group in input_ugroups_data:
    new_dim_name = group['dimensions'][0] + ", " + group['dimensions'][1]
    group[new_dim_name] = group['metrics'][0]
    del group['dimensions']
    del group['metrics']
    
    #conv_rate = round(float(int(group[new_dim_name]['values'][2])/int(group[new_dim_name]['values'][0])*100),2)
    group[new_dim_name]['values'] = list(map(float,group[new_dim_name]['values']))
    #group[new_dim_name]['values'].append(new_dim_name)
    values_list.append(group[new_dim_name]['values'])
    group[new_dim_name]['values'].insert(0,new_dim_name)
    #list(map(float,group[new_dim_name]['values'])).append(new_dim_name)
    #values_list = values_list.apply(lambda x: x[0][0][1])

    #group[new_dim_name]['values'].append(conv_rate)
    #print(group[new_dim_name])
    
values_list    
#input_ugroups_data


Out[338]:
[['Croydon, 18-24', 101.0, 41.584158415841586, 4.0, 3.9603960396039604],
 ['Croydon, 25-34', 334.0, 47.90419161676647, 17.0, 5.089820359281437],
 ['Croydon, 35-44', 223.0, 43.04932735426009, 7.0, 3.1390134529147984],
 ['Croydon, 45-54', 90.0, 45.55555555555556, 2.0, 2.2222222222222223],
 ['Croydon, 55-64', 32.0, 53.125, 1.0, 3.125],
 ['London, 18-24', 167.0, 49.700598802395206, 8.0, 4.790419161676647],
 ['London, 25-34', 842.0, 59.14489311163895, 24.0, 2.8503562945368173],
 ['London, 35-44', 482.0, 54.77178423236515, 14.0, 2.904564315352697],
 ['London, 45-54', 205.0, 55.60975609756098, 3.0, 1.4634146341463417],
 ['London, 55-64', 37.0, 48.64864864864865, 0.0, 0.0]]

In [339]:
# Define each metric dict

ugroups_data = {}

for ugroup in input_ugroups_data:
    #print (ugroup)
    
    for gr in ugroup:
        ugroups_data[gr] = {'sessions':0,
                       'bounce_rate':0,
                       'conversions':0,
                       'conversion_rate':0}
        
        ugroups_data[gr]['sessions'] = round(float(ugroup[gr]['values'][1]),2)
        ugroups_data[gr]['conversions'] = round(float(ugroup[gr]['values'][2]),2)
        ugroups_data[gr]['bounce_rate'] = round(float(ugroup[gr]['values'][3]),2)
        ugroups_data[gr]['conversion_rate'] = round(float(ugroup[gr]['values'][4]),2)
        
ugroups_data


Out[339]:
{'Croydon, 18-24': {'bounce_rate': 4.0,
  'conversion_rate': 3.96,
  'conversions': 41.58,
  'sessions': 101.0},
 'Croydon, 25-34': {'bounce_rate': 17.0,
  'conversion_rate': 5.09,
  'conversions': 47.9,
  'sessions': 334.0},
 'Croydon, 35-44': {'bounce_rate': 7.0,
  'conversion_rate': 3.14,
  'conversions': 43.05,
  'sessions': 223.0},
 'Croydon, 45-54': {'bounce_rate': 2.0,
  'conversion_rate': 2.22,
  'conversions': 45.56,
  'sessions': 90.0},
 'Croydon, 55-64': {'bounce_rate': 1.0,
  'conversion_rate': 3.12,
  'conversions': 53.12,
  'sessions': 32.0},
 'London, 18-24': {'bounce_rate': 8.0,
  'conversion_rate': 4.79,
  'conversions': 49.7,
  'sessions': 167.0},
 'London, 25-34': {'bounce_rate': 24.0,
  'conversion_rate': 2.85,
  'conversions': 59.14,
  'sessions': 842.0},
 'London, 35-44': {'bounce_rate': 14.0,
  'conversion_rate': 2.9,
  'conversions': 54.77,
  'sessions': 482.0},
 'London, 45-54': {'bounce_rate': 3.0,
  'conversion_rate': 1.46,
  'conversions': 55.61,
  'sessions': 205.0},
 'London, 55-64': {'bounce_rate': 0.0,
  'conversion_rate': 0.0,
  'conversions': 48.65,
  'sessions': 37.0}}

In [340]:
import collections
from collections import OrderedDict

columns = []
for u in ugroups_data:
    #print (u)
    for metric in ugroups_data[u]:
        columns.append('city_age')
        columns.append(metric)
        
    
columns = list(OrderedDict.fromkeys(columns))    
columns


Out[340]:
['city_age', 'sessions', 'bounce_rate', 'conversions', 'conversion_rate']

In [341]:
import pandas as pd

In [342]:
df = pd.DataFrame(values_list, columns = columns)

df.to_json(orient='split')
table_data = pd.read_json(df.to_json(orient='split'), orient='split')
table_data.conversion_rate = round(table_data.conversion_rate,2)
table_data = table_data[table_data.conversions > 0].sort('sessions')
table_data


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:6: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[342]:
city_age sessions bounce_rate conversions conversion_rate
4 Croydon, 55-64 32 53.125000 1 3.12
3 Croydon, 45-54 90 45.555556 2 2.22
0 Croydon, 18-24 101 41.584158 4 3.96
5 London, 18-24 167 49.700599 8 4.79
8 London, 45-54 205 55.609756 3 1.46
2 Croydon, 35-44 223 43.049327 7 3.14
1 Croydon, 25-34 334 47.904192 17 5.09
7 London, 35-44 482 54.771784 14 2.90
6 London, 25-34 842 59.144893 24 2.85

In [469]:
# create a list of sessions
samples_sessions=[]
for i in range(0,len(table_data)):
    a = table_data.sessions[i]
    samples_sessions.append(a)
    
samples_sessions = sorted(samples_sessions)
samples_sessions


Out[469]:
[32, 90, 101, 167, 205, 223, 334, 482, 842]

In [470]:
#cluster point 1(min)
cl1_point = min(samples_sessions) + 60
cl1_point


Out[470]:
92

In [471]:
#cluster point 2(middle)
mediana_number = int(len(samples_sessions)/2)
cl2_point = samples_sessions[mediana_number]+60
cl2_point


Out[471]:
265

In [472]:
#cluster point 3(max)
cl3_point = max(samples_sessions) - 240
cl3_point


Out[472]:
602

In [473]:
import math

In [474]:
# calculate distance between cl1 and each point in samples_sessions
distance_cl1 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl1_point - samples_sessions[i]), 2))
    distance_cl1.append(d)
distance_cl1


Out[474]:
[60.0, 2.0, 9.0, 75.0, 113.0, 131.0, 242.0, 390.0, 750.0]

In [475]:
distance_cl2 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl2_point - samples_sessions[i]), 2))
    distance_cl2.append(d)
distance_cl2


Out[475]:
[233.0, 175.0, 164.0, 98.0, 60.0, 42.0, 69.0, 217.0, 577.0]

In [476]:
distance_cl3 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl3_point - samples_sessions[i]), 2))
    distance_cl3.append(d)
distance_cl3


Out[476]:
[570.0, 512.0, 501.0, 435.0, 397.0, 379.0, 268.0, 120.0, 240.0]

In [478]:
#create cluster1 which include the smallest distance to cl1_point

cluster1 = []
for k in range(0,len(samples_sessions)):
   
            if (distance_cl1[k] < distance_cl2[k]):
                res = distance_cl1[k]
                cluster1.append(res)
                

cluster1


Out[478]:
[60.0, 2.0, 9.0, 75.0]

In [479]:
cluster2 = []
for k in range(len(cluster1),len(samples_sessions)):
    if (distance_cl2[k] < distance_cl3[k]):
                res = distance_cl2[k]
                cluster2.append(res)
cluster2


Out[479]:
[60.0, 42.0, 69.0]

In [480]:
cluster3 = []
for k in range(len(cluster1)+len(cluster2),len(samples_sessions)):
    if (distance_cl3[k]<distance_cl2[k]):
                res = distance_cl3[k]
                cluster3.append(res)
cluster3


Out[480]:
[120.0, 240.0]

In [483]:
#mark first cluster as "low" and define a values for it  
cl1 = []
for f in range(0,len(cluster1)):
    n =[]
    n.append("low")
    n.append(samples_sessions[f])
    cl1.append(n)
cl1


Out[483]:
[['low', 32], ['low', 90], ['low', 101], ['low', 167]]

In [487]:
#create a table for cluster and value
table_cl1 = pd.DataFrame(cl1,columns = ['cluster_sess','sessions'])
table_cl1


Out[487]:
cluster_sess sessions
0 low 32
1 low 90
2 low 101
3 low 167

In [488]:
cl2 = []
for h in range(len(cluster1),len(cluster2)+len(cluster1)):
    n =[]
    n.append("mid")
    n.append(samples_sessions[h])
    cl2.append(n)
cl2


Out[488]:
[['mid', 205], ['mid', 223], ['mid', 334]]

In [489]:
table_cl2 = pd.DataFrame(cl2,index = range(len(cluster1),len(cluster2)+len(cluster1)), columns = ['cluster_sess','sessions'])
table_cl2


Out[489]:
cluster_sess sessions
4 mid 205
5 mid 223
6 mid 334

In [490]:
cl3 = []
for h in range(len(cluster2)+len(cluster1),len(samples_sessions)):
    n =[]
    n.append("high")
    n.append(samples_sessions[h])
    cl3.append(n)
cl3


Out[490]:
[['high', 482], ['high', 842]]

In [491]:
table_cl3 = pd.DataFrame(cl3,index = range(len(cluster2)+len(cluster1),len(samples_sessions)), columns = ['cluster_sess','sessions'])
table_cl3


Out[491]:
cluster_sess sessions
7 high 482
8 high 842

In [493]:
#join clusters into one table
result_clusters = pd.concat([table_cl1,table_cl2,table_cl3])
result_clusters


Out[493]:
cluster_sess sessions
0 low 32
1 low 90
2 low 101
3 low 167
4 mid 205
5 mid 223
6 mid 334
7 high 482
8 high 842

In [494]:
result_sess = result_clusters.merge(table_data, on = 'sessions')
result_sess


Out[494]:
cluster_sess sessions city_age bounce_rate conversions conversion_rate
0 low 32 Croydon, 55-64 53.125000 1 3.12
1 low 90 Croydon, 45-54 45.555556 2 2.22
2 low 101 Croydon, 18-24 41.584158 4 3.96
3 low 167 London, 18-24 49.700599 8 4.79
4 mid 205 London, 45-54 55.609756 3 1.46
5 mid 223 Croydon, 35-44 43.049327 7 3.14
6 mid 334 Croydon, 25-34 47.904192 17 5.09
7 high 482 London, 35-44 54.771784 14 2.90
8 high 842 London, 25-34 59.144893 24 2.85

Cluster analysis by conversion rate


In [495]:
samples_cr=[]
for i in range(0,len(table_data)):
    a = round(table_data.conversion_rate[i],2)
    samples_cr.append(a)
    #print(a)
samples_cr = sorted(samples_cr)
samples_cr


Out[495]:
[1.46,
 2.2200000000000002,
 2.8500000000000001,
 2.8999999999999999,
 3.1200000000000001,
 3.1400000000000001,
 3.96,
 4.79,
 5.0899999999999999]

In [496]:
cl1_point=min(samples_cr) + 0.6
cl1_point


Out[496]:
2.0600000000000001

In [498]:
mediana_number = int(len(samples_cr)/2)
cl2_point = samples_cr[mediana_number] - 0.6
cl2_point


Out[498]:
2.52

In [499]:
cl3_point=max(samples_cr)-1
cl3_point


Out[499]:
4.0899999999999999

In [500]:
distance_cl1 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl1_point - samples_cr[i]), 2))
    distance_cl1.append(d)
distance_cl1


Out[500]:
[0.6000000000000001,
 0.16000000000000014,
 0.79,
 0.8399999999999999,
 1.06,
 1.08,
 1.9,
 2.73,
 3.03]

In [501]:
distance_cl2 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl2_point - samples_cr[i]), 2))
    distance_cl2.append(d)
distance_cl2


Out[501]:
[1.06,
 0.2999999999999998,
 0.33000000000000007,
 0.3799999999999999,
 0.6000000000000001,
 0.6200000000000001,
 1.44,
 2.27,
 2.57]

In [502]:
distance_cl3 = []
for i in range(0,len(table_data)):
    d = math.sqrt(math.pow((cl3_point - samples_cr[i]), 2))
    distance_cl3.append(d)
distance_cl3


Out[502]:
[2.63,
 1.8699999999999997,
 1.2399999999999998,
 1.19,
 0.9699999999999998,
 0.9499999999999997,
 0.1299999999999999,
 0.7000000000000002,
 1.0]

In [503]:
cluster1 = []
for k in range(0,len(samples_cr)):
    
    if (distance_cl1[k] < distance_cl2[k]):
        res = distance_cl1[k]
        cluster1.append(res)                

cluster1


Out[503]:
[0.6000000000000001, 0.16000000000000014]

In [504]:
cluster2 = []
for k in range(len(cluster1),len(samples_cr)):
    if (distance_cl2[k] < distance_cl3[k]):
                res = distance_cl2[k]
                cluster2.append(res)
cluster2


Out[504]:
[0.33000000000000007,
 0.3799999999999999,
 0.6000000000000001,
 0.6200000000000001]

In [505]:
cluster3 = []
for k in range(len(cluster1)+len(cluster2),len(samples_cr)):
    if (distance_cl3[k]<distance_cl2[k]):
                res = distance_cl3[k]
                cluster3.append(res)
cluster3


Out[505]:
[0.1299999999999999, 0.7000000000000002, 1.0]

In [506]:
cl1 = []
for f in range(0,len(cluster1)):
    n =[]
    n.append("low")
    n.append(samples_cr[f])
    cl1.append(n)
cl1


Out[506]:
[['low', 1.46], ['low', 2.2200000000000002]]

In [507]:
table_cl1 = pd.DataFrame(cl1,columns = ['cluster_cr','conversion_rate'])
table_cl1


Out[507]:
cluster_cr conversion_rate
0 low 1.46
1 low 2.22

In [508]:
cl2 = []
for h in range(len(cluster1),len(cluster2)+len(cluster1)):
    n =[]
    n.append("mid")
    n.append(samples_cr[h])
    cl2.append(n)
cl2


Out[508]:
[['mid', 2.8500000000000001],
 ['mid', 2.8999999999999999],
 ['mid', 3.1200000000000001],
 ['mid', 3.1400000000000001]]

In [509]:
table_cl2 = pd.DataFrame(cl2,index = range(len(cluster1),len(cluster2)+len(cluster1)), columns = ['cluster_cr','conversion_rate'])
table_cl2


Out[509]:
cluster_cr conversion_rate
2 mid 2.85
3 mid 2.90
4 mid 3.12
5 mid 3.14

In [510]:
cl3 = []
for h in range(len(cluster2)+len(cluster1),len(samples_cr)):
    n =[]
    n.append("high")
    n.append(samples_cr[h])
    cl3.append(n)
cl3


Out[510]:
[['high', 3.96], ['high', 4.79], ['high', 5.0899999999999999]]

In [511]:
table_cl3 = pd.DataFrame(cl3,index = range(len(cluster2)+len(cluster1),len(samples_sessions)), columns = ['cluster_cr','conversion_rate'])
table_cl3


Out[511]:
cluster_cr conversion_rate
6 high 3.96
7 high 4.79
8 high 5.09

In [512]:
result_clusters = pd.concat([table_cl1,table_cl2,table_cl3])
result_clusters


Out[512]:
cluster_cr conversion_rate
0 low 1.46
1 low 2.22
2 mid 2.85
3 mid 2.90
4 mid 3.12
5 mid 3.14
6 high 3.96
7 high 4.79
8 high 5.09

In [513]:
result_cr = result_clusters.merge(table_data, on = 'conversion_rate')
result_cr


Out[513]:
cluster_cr conversion_rate city_age sessions bounce_rate conversions
0 low 1.46 London, 45-54 205 55.609756 3
1 low 2.22 Croydon, 45-54 90 45.555556 2
2 mid 2.85 London, 25-34 842 59.144893 24
3 mid 2.90 London, 35-44 482 54.771784 14
4 mid 3.12 Croydon, 55-64 32 53.125000 1
5 mid 3.14 Croydon, 35-44 223 43.049327 7
6 high 3.96 Croydon, 18-24 101 41.584158 4
7 high 4.79 London, 18-24 167 49.700599 8
8 high 5.09 Croydon, 25-34 334 47.904192 17

In [514]:
final = result_cr.merge(result_sess, on = 'city_age')
final


Out[514]:
cluster_cr conversion_rate_x city_age sessions_x bounce_rate_x conversions_x cluster_sess sessions_y bounce_rate_y conversions_y conversion_rate_y
0 low 1.46 London, 45-54 205 55.609756 3 mid 205 55.609756 3 1.46
1 low 2.22 Croydon, 45-54 90 45.555556 2 low 90 45.555556 2 2.22
2 mid 2.85 London, 25-34 842 59.144893 24 high 842 59.144893 24 2.85
3 mid 2.90 London, 35-44 482 54.771784 14 high 482 54.771784 14 2.90
4 mid 3.12 Croydon, 55-64 32 53.125000 1 low 32 53.125000 1 3.12
5 mid 3.14 Croydon, 35-44 223 43.049327 7 mid 223 43.049327 7 3.14
6 high 3.96 Croydon, 18-24 101 41.584158 4 low 101 41.584158 4 3.96
7 high 4.79 London, 18-24 167 49.700599 8 low 167 49.700599 8 4.79
8 high 5.09 Croydon, 25-34 334 47.904192 17 mid 334 47.904192 17 5.09

In [515]:
final = final[["city_age","cluster_cr","cluster_sess","conversion_rate_x","conversions_x","sessions_x"]]
final


Out[515]:
city_age cluster_cr cluster_sess conversion_rate_x conversions_x sessions_x
0 London, 45-54 low mid 1.46 3 205
1 Croydon, 45-54 low low 2.22 2 90
2 London, 25-34 mid high 2.85 24 842
3 London, 35-44 mid high 2.90 14 482
4 Croydon, 55-64 mid low 3.12 1 32
5 Croydon, 35-44 mid mid 3.14 7 223
6 Croydon, 18-24 high low 3.96 4 101
7 London, 18-24 high low 4.79 8 167
8 Croydon, 25-34 high mid 5.09 17 334

In [516]:
import pandas as pd

In [517]:
final["group"] = final["cluster_cr"] + final["cluster_sess"]
final


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
Out[517]:
city_age cluster_cr cluster_sess conversion_rate_x conversions_x sessions_x group
0 London, 45-54 low mid 1.46 3 205 lowmid
1 Croydon, 45-54 low low 2.22 2 90 lowlow
2 London, 25-34 mid high 2.85 24 842 midhigh
3 London, 35-44 mid high 2.90 14 482 midhigh
4 Croydon, 55-64 mid low 3.12 1 32 midlow
5 Croydon, 35-44 mid mid 3.14 7 223 midmid
6 Croydon, 18-24 high low 3.96 4 101 highlow
7 London, 18-24 high low 4.79 8 167 highlow
8 Croydon, 25-34 high mid 5.09 17 334 highmid

In [518]:
import scipy
group_midhigh = []
sess_mh = []
convs_mh = []
cr_mh = []
for i in range(0,len(final)):
   
    if final.group[i] == "midhigh":
        sess_mh.append(final.sessions_x[i])
        convs_mh.append(final.conversions_x[i])
        cr_mh.append(final.conversion_rate_x[i])
        group_midhigh.append(final.city_age[i])
        sess_midhigh = sum(sess_mh)
        convs_midhigh = sum(convs_mh)
        cr_midhigh = scipy.mean(cr_mh,0)
group_midhigh = ";".join(group_midhigh)  
group1 = [group_midhigh,sess_midhigh,convs_midhigh,cr_midhigh]
group1


Out[518]:
['London, 25-34;London, 35-44', 1324, 38, 2.875]

In [519]:
group_highmid = []
sess_hm = []
convs_hm = []
cr_hm = []
for i in range(0,len(final)):
   
    if final.group[i] == "highmid":
        sess_hm.append(final.sessions_x[i])
        convs_hm.append(final.conversions_x[i])
        cr_hm.append(final.conversion_rate_x[i])
        group_highmid.append(final.city_age[i])
        sess_highmid = sum(sess_hm)
        convs_highmid = sum(convs_hm)
        cr_highmid = scipy.mean(cr_hm,0)
        
group_highmid = ";".join(group_highmid)  
group2 = [group_highmid,sess_highmid,convs_highmid,cr_highmid]
group2


Out[519]:
['Croydon, 25-34', 334, 17, 5.0899999999999999]

In [520]:
group_highlow = []
sess_hl = []
convs_hl = []
cr_hl = []
for i in range(0,len(final)):
   
    if final.group[i] == "highlow":
        sess_hl.append(final.sessions_x[i])
        convs_hl.append(final.conversions_x[i])
        cr_hl.append(final.conversion_rate_x[i])
        group_highlow.append(final.city_age[i])
        sess_highlow = sum(sess_hl)
        convs_highlow = sum(convs_hl)
        cr_highlow = scipy.mean(cr_hl,0)
        
group_highlow = ";".join(group_highlow)  
group3 = [group_highlow,sess_highlow,convs_highlow,cr_highlow]
group3


Out[520]:
['Croydon, 18-24;London, 18-24', 268, 12, 4.375]

In [521]:
group = []
sess_group = []
convs_group = []
cr_group = []
for i in range(0,len(final)):
   
    if (final.group[i] != "highlow") and (final.group[i] != "highmid") and (final.group[i] != "midhigh"):
        sess_group.append(final.sessions_x[i])
        convs_group.append(final.conversions_x[i])
        cr_group.append(final.conversion_rate_x[i])
        group.append(final.city_age[i])
        sess = sum(sess_group)
        convs = sum(convs_group)
        cr = scipy.mean(cr_group)
        
group = ";".join(group)  
group4 = [group,sess,convs,cr]
group4


Out[521]:
['London, 45-54;Croydon, 45-54;Croydon, 55-64;Croydon, 35-44',
 550,
 13,
 2.4850000000000003]

In [522]:
results = [group1,group2,group3,group4]
results


Out[522]:
[['London, 25-34;London, 35-44', 1324, 38, 2.875],
 ['Croydon, 25-34', 334, 17, 5.0899999999999999],
 ['Croydon, 18-24;London, 18-24', 268, 12, 4.375],
 ['London, 45-54;Croydon, 45-54;Croydon, 55-64;Croydon, 35-44',
  550,
  13,
  2.4850000000000003]]

In [523]:
final_table = pd.DataFrame(results, columns = ["group","sessions","conversions","conversion_rate"])
final_table


Out[523]:
group sessions conversions conversion_rate
0 London, 25-34;London, 35-44 1324 38 2.875
1 Croydon, 25-34 334 17 5.090
2 Croydon, 18-24;London, 18-24 268 12 4.375
3 London, 45-54;Croydon, 45-54;Croydon, 55-64;Cr... 550 13 2.485

In [ ]: