In this notebook we create a table grouping transaction information by customers’ purchase frequency. This is done with functions from the pandas librairie such as df.groupby() and df.cut().
In [30]:
import pandas as pd
import contiamo
In [31]:
transactions = %contiamo query query:sql:48590597:411:g71GXzJjsx4Uvad11ouKjoYbQUNNPy-qRMKkBNZfyx4
customers = %contiamo query query:sql:48590597:441:MG5W2dMjXzYgsHsgdQYzmhv44dxEQX2Lodu5Uh2Hx_s
applications = %contiamo query query:sql:48590597:442:-gz3nbw1fdmtSXkD4zGNA-cVa7s6sQtRn8upCSn6uys
In [32]:
transactions = transactions.loc[(transactions['Field transaction date date'] <= 20170331) & (transactions['Field transaction date date'] >= 20170301)]
In [27]:
df = pd.DataFrame({
'customer_id' : transactions['Field customer id'],
'revenue' : pd.to_numeric(transactions['Field app price']),
'n_of_purchases' : [1]*len(transactions)
})
df.head()
Out[27]:
In [28]:
total_users = df['customer_id'].nunique()
#Grouping by customer id
pf = df.groupby('customer_id').agg({
'revenue' : 'sum',
'n_of_purchases' : 'sum'
}).reset_index()
#Selection of data to create 'No. of Purchases', 'No. of UAUs'and 'Total Revenue'columns
x = pf.groupby('n_of_purchases').agg({
'customer_id': 'size',
'revenue' : 'sum',
})
x = x.reset_index()
#Renaming columns
x.rename(columns = {'customer_id': 'n_of_users'}, inplace=True)
#Creation of '% of Total UAUs' and 'Total transations'columns
x['percentage_of_users'] = 100 * x['n_of_users'] / total_users
x['total_transactions'] = x['n_of_purchases'] * x['n_of_users']
#Creation of 'No. of Purchases' buckets
x['n_of_purchases'] = pd.cut(x['n_of_purchases'], bins=[0,1,2,3,5,8,13,21,34,55,float('inf')])
x.head()
Out[28]:
In [29]:
#Grouping data by 'No. of Purchases' buckets
y = x.groupby('n_of_purchases').agg({
'revenue': 'sum',
'n_of_users': 'sum',
'percentage_of_users' : 'sum',
'total_transactions': 'sum',
})
y = y.reset_index()
#Creation of 'Average revenue per user' and 'Average transaction price' colums
y['average_revenue_per_user'] = y['revenue'] / y['n_of_users']
y['average_transaction_price'] = y['revenue'] / y['total_transactions']
#Putting the colums in the right order
cols = ['n_of_purchases','n_of_users', 'percentage_of_users', 'revenue', 'average_revenue_per_user', 'total_transactions', 'average_transaction_price']
y = y[cols]
#Renaming the 'No. of Purchases' buckets
y['n_of_purchases']=y['n_of_purchases'].astype(str)
y = y.replace({'n_of_purchases': {
'(0, 1]': '1',
'(1, 2]' : '2',
'(2, 3]' : '3',
'(3, 5]' : '4-5',
'(5, 8]' : '6-8',
'(8, 13]' : '9-13',
'(13, 21]' : '14-21',
'(21, 34]' : '22-34',
'(34, 55]' : '35-55',
'(55, inf]' : 'Above 55',
}})
y
Out[29]:
Interpretation: The maximum revenue is generated by people buying apps only once a month (low frequency buyers). We can now identify buying behaviours according to purchase frequency.