In [1]:
import graphistry
import pandas as pd
import numpy as np
#graphistry.register(key='MY_API_KEY', server='labs.graphistry.com') #https://www.graphistry.com/api-request
In [2]:
transactions = pd.read_csv('../../data/transactions.csv')
transactions['Date'] = pd.to_datetime(transactions['Date'],unit='ms') #coerce date format
print('# transactions', len(transactions))
transactions[:3]
Out[2]:
Task: Spot the embezzling
In [23]:
g = graphistry.edges(transactions).bind(source='Source', destination='Destination')
In [24]:
g.plot()
Out[24]:
In [25]:
wallet_in = transactions\
.groupby('Destination')\
.agg({'isTainted': lambda x: 1 if x.sum() > 0 else 0, 'Amount $': np.sum})\
.reset_index().rename(columns={'Destination': 'wallet', 'isTainted': 'isTaintedWallet'})
#not all wallets received money, tag these
wallet_in['Receivables'] = True
wallet_in[:3]
Out[25]:
In [26]:
wallet_out = transactions\
.groupby('Source')\
.agg({'isTainted': np.sum, 'Amount $': np.max})\
.reset_index().rename(columns={'Source': 'wallet', 'isTainted': 'isTaintedWallet'})
#not all wallets received money, tag these
wallet_out['Payables'] = True
wallet_out[:3]
Out[26]:
In [27]:
wallets = pd.merge(wallet_in, wallet_out, how='outer')
wallets['Receivables'] = wallets['Receivables'].fillna(False)
wallets['Payables'] = wallets['Payables'].fillna(False)
print('# Wallets only sent or only received', len(wallet_in) + len(wallet_out) - len(wallets))
wallets[:3]
Out[27]:
In [28]:
g.nodes(wallets).bind(node='wallet', point_color='isTaintedWallet').plot()
Out[28]:
In [ ]: