User groups


In [298]:
request = "POST https://analyticsreporting.googleapis.com/v4/reports:batchGet?fields=reports(columnHeader%2Cdata(rows%2Ctotals))&key={YOUR_API_KEY}"
request = {
 "reportRequests": [
  {
   "viewId": "123303369",
   "dateRanges": [
    {
     "startDate": "2017-01-01",
     "endDate": "2017-04-30"
    }
   ],
   "metrics": [
    {
     "expression": "ga:sessions"
    },
    {
     "expression": "ga:sessionDuration"
    },
    {
     "expression": "ga:goal1Completions"
    },
    {
     "expression": "ga:bounceRate"
    }
   ],
   "dimensions": [
    {
     "name": "ga:city"
    },
    {
     "name": "ga:userAgeBracket"
    }
   ]
  }
 ]
}

In [367]:
import json

with open('data/TMRW_users.json') as file:
    input_ugroups = json.load(file)
    
#input_ugroups

# Define dimensions list
input_ugroups_dimensions = input_ugroups['reports'][0]['columnHeader']['dimensions']

dimension_count = len(input_ugroups_dimensions)

# Define metrics list
input_ugroups_metrics = input_ugroups['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries']

def create_metric_list(raw_data):
    lst = []
    for item in raw_data:
        lst.append(item['name'])
    return lst

input_ugroups_metrics = create_metric_list(input_ugroups_metrics)


# Create input data

input_ugroups_data = input_ugroups['reports'][0]['data']['rows']

input_ugroups_data


Out[367]:
[{'dimensions': ['(not set)', '25-34'],
  'metrics': [{'values': ['27', '1848.0', '1', '62.96296296296296']}]},
 {'dimensions': ['(not set)', '35-44'],
  'metrics': [{'values': ['23', '1790.0', '1', '52.17391304347826']}]},
 {'dimensions': ['Croydon', '18-24'],
  'metrics': [{'values': ['101', '10047.0', '4', '41.584158415841586']}]},
 {'dimensions': ['Croydon', '25-34'],
  'metrics': [{'values': ['334', '62737.0', '17', '47.90419161676647']}]},
 {'dimensions': ['Croydon', '35-44'],
  'metrics': [{'values': ['223', '34564.0', '7', '43.04932735426009']}]},
 {'dimensions': ['Croydon', '45-54'],
  'metrics': [{'values': ['90', '14131.0', '2', '45.55555555555556']}]},
 {'dimensions': ['Croydon', '55-64'],
  'metrics': [{'values': ['32', '1932.0', '1', '53.125']}]},
 {'dimensions': ['Hove', '35-44'],
  'metrics': [{'values': ['12', '833.0', '1', '16.666666666666664']}]},
 {'dimensions': ['London', '18-24'],
  'metrics': [{'values': ['167', '29335.0', '8', '49.700598802395206']}]},
 {'dimensions': ['London', '25-34'],
  'metrics': [{'values': ['842', '84813.0', '24', '59.14489311163895']}]},
 {'dimensions': ['London', '35-44'],
  'metrics': [{'values': ['482', '56008.0', '14', '54.77178423236515']}]},
 {'dimensions': ['London', '45-54'],
  'metrics': [{'values': ['205', '26525.0', '3', '55.60975609756098']}]},
 {'dimensions': ['London', '55-64'],
  'metrics': [{'values': ['37', '3329.0', '0', '48.64864864864865']}]},
 {'dimensions': ['London', '65+'],
  'metrics': [{'values': ['15', '2224.0', '1', '66.66666666666666']}]}]

In [368]:
values_list = []
for group in input_ugroups_data:
    new_dim_name = group['dimensions'][0] + ", " + group['dimensions'][1]
    group[new_dim_name] = group['metrics'][0]
    del group['dimensions']
    del group['metrics']
    
    conv_rate = round(float(int(group[new_dim_name]['values'][2])/int(group[new_dim_name]['values'][0])*100),2)
    values_list.append(group[new_dim_name]['values'])

    group[new_dim_name]['values'].append(conv_rate)
    
    
#values_list    
input_ugroups_data


Out[368]:
[{'(not set), 25-34': {'values': ['27',
    '1848.0',
    '1',
    '62.96296296296296',
    3.7]}},
 {'(not set), 35-44': {'values': ['23',
    '1790.0',
    '1',
    '52.17391304347826',
    4.35]}},
 {'Croydon, 18-24': {'values': ['101',
    '10047.0',
    '4',
    '41.584158415841586',
    3.96]}},
 {'Croydon, 25-34': {'values': ['334',
    '62737.0',
    '17',
    '47.90419161676647',
    5.09]}},
 {'Croydon, 35-44': {'values': ['223',
    '34564.0',
    '7',
    '43.04932735426009',
    3.14]}},
 {'Croydon, 45-54': {'values': ['90',
    '14131.0',
    '2',
    '45.55555555555556',
    2.22]}},
 {'Croydon, 55-64': {'values': ['32', '1932.0', '1', '53.125', 3.12]}},
 {'Hove, 35-44': {'values': ['12', '833.0', '1', '16.666666666666664', 8.33]}},
 {'London, 18-24': {'values': ['167',
    '29335.0',
    '8',
    '49.700598802395206',
    4.79]}},
 {'London, 25-34': {'values': ['842',
    '84813.0',
    '24',
    '59.14489311163895',
    2.85]}},
 {'London, 35-44': {'values': ['482',
    '56008.0',
    '14',
    '54.77178423236515',
    2.9]}},
 {'London, 45-54': {'values': ['205',
    '26525.0',
    '3',
    '55.60975609756098',
    1.46]}},
 {'London, 55-64': {'values': ['37',
    '3329.0',
    '0',
    '48.64864864864865',
    0.0]}},
 {'London, 65+': {'values': ['15', '2224.0', '1', '66.66666666666666', 6.67]}}]

In [369]:
# Define each metric dict

ugroups_data = {}

for ugroup in input_ugroups_data:
    #print (ugroup)
    
    for gr in ugroup:
        ugroups_data[gr] = {'sessions':0,
                       'duration':0,
                       'bounce rate':0,
                       'conversions':0,
                       'conversion rate':0}
        
        ugroups_data[gr]['sessions'] = int(ugroup[gr]['values'][0])
        ugroups_data[gr]['duration'] = float(ugroup[gr]['values'][1])
        ugroups_data[gr]['conversions'] = int(ugroup[gr]['values'][2])
        ugroups_data[gr]['bounce rate'] = float(ugroup[gr]['values'][3])
        ugroups_data[gr]['conversion rate'] = float(ugroup[gr]['values'][4])
        
#ugroups_data

In [370]:
rows = list(ugroups_data.keys())
rows


Out[370]:
['(not set), 25-34',
 '(not set), 35-44',
 'Croydon, 18-24',
 'Croydon, 25-34',
 'Croydon, 35-44',
 'Croydon, 45-54',
 'Croydon, 55-64',
 'Hove, 35-44',
 'London, 18-24',
 'London, 25-34',
 'London, 35-44',
 'London, 45-54',
 'London, 55-64',
 'London, 65+']

In [371]:
import collections
from collections import OrderedDict

columns = []
for u in ugroups_data:
    #print (test[r])
    for metric in ugroups_data[u]:
        columns.append(metric)
    
columns = list(OrderedDict.fromkeys(columns))    
columns


Out[371]:
['sessions', 'duration', 'bounce rate', 'conversions', 'conversion rate']

In [233]:
import pandas as pd

In [374]:
df = pd.DataFrame(values_list,
                  index = rows,
                  columns = columns)

df.to_json(orient='split')
pd.read_json(_, orient='split')


Out[374]:
sessions duration bounce rate conversions conversion rate
(not set), 25-34 27 1848 1 62.962963 3.70
(not set), 35-44 23 1790 1 52.173913 4.35
Croydon, 18-24 101 10047 4 41.584158 3.96
Croydon, 25-34 334 62737 17 47.904192 5.09
Croydon, 35-44 223 34564 7 43.049327 3.14
Croydon, 45-54 90 14131 2 45.555556 2.22
Croydon, 55-64 32 1932 1 53.125000 3.12
Hove, 35-44 12 833 1 16.666667 8.33
London, 18-24 167 29335 8 49.700599 4.79
London, 25-34 842 84813 24 59.144893 2.85
London, 35-44 482 56008 14 54.771784 2.90
London, 45-54 205 26525 3 55.609756 1.46
London, 55-64 37 3329 0 48.648649 0.00
London, 65+ 15 2224 1 66.666667 6.67

In [ ]:


In [ ]:


In [ ]:


In [14]:
import numpy as np
 
def cluster_points(X, mu):
    clusters  = {}
    for x in X:
        bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
                    for i in enumerate(mu)], key=lambda t:t[1])[0]
        try:
            clusters[bestmukey].append(x)
        except KeyError:
            clusters[bestmukey] = [x]
    return clusters
 
def reevaluate_centers(mu, clusters):
    newmu = []
    keys = sorted(clusters.keys())
    for k in keys:
        newmu.append(np.mean(clusters[k], axis = 0))
    return newmu

def has_converged(mu, oldmu):
    return set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu])

def find_centers(X, K):
    # Initialize to K random centers
    oldmu = random.sample(X, K)
    mu = random.sample(X, K)
    while not has_converged(mu, oldmu):
        oldmu = mu
        # Assign all points in X to clusters
        clusters = cluster_points(X, mu)
        # Reevaluate centers
        mu = reevaluate_centers(oldmu, clusters)
    return(mu, clusters)

In [15]:
import random
 
def init_board(N):
    X = np.array([(random.uniform(-1, 1), random.uniform(-1, 1)) for i in range(N)])
    return X

In [16]:
def init_board_gauss(N, k):
    n = float(N)/k
    X = []
    for i in range(k):
        c = (random.uniform(-1, 1), random.uniform(-1, 1))
        s = random.uniform(0.05,0.5)
        x = []
        while len(x) < n:
            a, b = np.array([np.random.normal(c[0], s), np.random.normal(c[1], s)])
            # Continue drawing points from the distribution in the range [-1,1]
            if abs(a) < 1 and abs(b) < 1:
                x.append([a,b])
        X.extend(x)
    X = np.array(X)[:N]
    return X

In [4]:
import pybrain
dir(pybrain)


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-4-cd3e1c99c440> in <module>()
----> 1 import pybrain
      2 dir(pybrain)

ModuleNotFoundError: No module named 'pybrain'

In [ ]: