Investigating large datasets becomes easier by directly visualizing cypher (BOLT) query results with Graphistry. This tutorial walks through querying Neo4j, visualizing the results, and additional configurations and queries.
This analysis is based on a blockchain data extract the Graphistry team performed around court proceedings from when Carl Force, the key DEA agent in the Silk Road bust, was sentenced for embezzling money from Ross Ulbricht (Dread Pirate Roberts). We explore to how to recreate the analysis, and determine where Carl's money went after he performed the initial embezzling.
Instructions
Further reading
Install dependencies
In [ ]:
#!pip install --user pandas
#!pip install --user graphistry[bolt]
### ADVANCED:
### If you already have the neo4j python driver, you can leave out '[bolt]':
### !pip install --user graphistry
### If you already have graphistry but not neo4j, you can reuse your existing graphistry:
### !pip install --user neo4j
Import & test
In [ ]:
import pandas as pd
import neo4j # just for testing
from neo4j import GraphDatabase # for data loader
import graphistry
print('neo4j', neo4j.__version__)
print('graphistry', graphistry.__version__)
Connect
In [ ]:
NEO4J = {
'uri': "bolt://my.site.COM:7687",
'auth': ("neo4j", "myalphapwd1")
}
graphistry.register(bolt=NEO4J)
## If not using the local Graphistry server
#GRAPHISTRY = {
# 'server': 'MY.GRAPHISTRY.COM',
# 'api': 2,
# 'key': 'MY_GRAPHISTRY_API_KEY'
#}
#graphistry.register(bolt=NEO4J, **GRAPHISTRY)
In [ ]:
DELETE_EXISTING_DATABASE=False
POPULATE_DATABASE=False
if DELETE_EXISTING_DATABASE:
driver = GraphDatabase.driver(**NEO4J)
with driver.session() as session:
# split into 2 transancations case of memory limit errors
print('Deleting existing transactions')
tx = session.begin_transaction()
tx.run("""MATCH (a:Account)-[r]->(b) DELETE r""")
tx.commit()
print('Deleting existing accounts')
tx = session.begin_transaction()
tx.run("""MATCH (a:Account) DELETE a""")
tx.commit()
print('Delete successful')
if POPULATE_DATABASE:
edges = pd.read_csv('https://www.dropbox.com/s/q1daa707y99ind9/edges.csv?dl=1')
edges = edges.rename(columns={'Amount $': "USD", 'Transaction ID': 'Transaction'})[['USD', 'Date', 'Source', 'Destination', 'Transaction']]
id_len = len(edges['Source'][0].split('...')[0]) #truncate IDs (dirty data)
edges = edges.assign(
Source=edges['Source'].apply(lambda id: id[:id_len]),
Destination=edges['Destination'].apply(lambda id: id[:id_len]))
ROSS_FULL='2a37b3bdca935152335c2097e5da367db24209cc'
ROSS = ROSS_FULL[:32]
CARL_FULL = 'b2233dd22ade4c9978ec1fd1fbb36eb7f9b4609e'
CARL = CARL_FULL[:32]
CARL_NICK = 'Carl Force (DEA)'
ROSS_NICK = 'Ross Ulbricht (SilkRoad)'
nodes = pd.read_csv('https://www.dropbox.com/s/nf796f1asow8tx7/nodes.csv?dl=1')
nodes = nodes.rename(columns={'Balance $': 'USD', 'Balance (avg) $': 'USD_avg', 'Balance (max) $': 'USD_max', 'Tainted Coins': 'Tainted_Coins'})[['Account', 'USD', 'USD_avg', 'USD_max', 'Tainted_Coins']]
nodes['Account'] = nodes['Account'].apply(lambda id: id[:id_len])
nodes['Account'] = nodes['Account'].apply(lambda id: CARL_NICK if id == CARL else ROSS_NICK if id == ROSS else id)
driver = GraphDatabase.driver(**NEO4J)
with driver.session() as session:
tx = session.begin_transaction()
print('Loading', len(nodes), 'accounts')
for index, row in nodes.iterrows():
if index % 2000 == 0:
print('Committing', index - 2000, '...', index)
tx.commit()
tx = session.begin_transaction()
tx.run("""
CREATE (a:Account {
Account: $Account,
USD: $USD, USD_avg: $USD_avg, USD_max: $USD_max, Tainted_Coins: $Tainted_Coins
})
RETURN id(a)
""", **row)
if index % 2000 == 0:
print(index)
print('Committing rest')
tx.commit()
tx = session.begin_transaction()
print('Creating index on Account')
tx.run(""" CREATE INDEX ON :Account(Account) """)
tx.commit()
STATUS=1000
BATCH=2000
driver = GraphDatabase.driver(**NEO4J)
with driver.session() as session:
tx = session.begin_transaction()
print('Loading', len(edges), 'transactions')
for index, row in edges.iterrows():
tx.run("""MATCH (a:Account),(b:Account)
WHERE a.Account = $Source AND b.Account = $Destination
CREATE (a)-[r:PAYMENT {
Source: $Source, Destination: $Destination, USD: $USD, Date: $Date, Transaction: $Transaction
}]->(b)
""", **row)
if index % STATUS == 0:
print(index)
if index % BATCH == 0 and index > 0:
print('sending batch out')
tx.commit()
print('... done')
tx = session.begin_transaction()
tx.commit()
In [ ]:
g = graphistry.cypher("""
MATCH (a)-[r:PAYMENT]->(b) WHERE r.USD > 7000 AND r.USD < 10000 RETURN a, r, b ORDER BY r.USD DESC
""")
In [ ]:
g.plot()
Screenshot
add histogram for... on edge:USD and point:USD_MAXpoint:USD_MAX, toggle it to controling point size, and in the Scene settings, increase the point size slider
In [ ]:
g = g\
.bind(point_title='Account')\
.bind(edge_title='USD')
g.plot()
In [ ]:
g.cypher("""
match (a)-[r:PAYMENT*1..20]->(b)
where a.Account = $root and ALL(transfer IN r WHERE transfer.USD > $min_amount and transfer.USD < $max_amount )
return a, r, b
""",
{'root': "Carl Force (DEA)",
'min_amount': 999,
'max_amount': 99999}).plot()
Screenshot:
In [ ]:
g.cypher("""
match (a)-[r:PAYMENT*1..10]->(b)
where b.Account=$sink and ALL(transfer IN r WHERE transfer.USD > $min_amount and transfer.USD < $max_amount )
return r, a, b
""",
{'sink': "Carl Force (DEA)",
'min_amount': 1999,
'max_amount': 99999}).plot()
Screenshot:
In [ ]:
g.cypher("match (a)-[r:PAYMENT*1..10]->(b) where a.Account=$silk and b.Account=$dea return r, a, b",
{'dea': "Carl Force (DEA)", "silk": "Ross Ulbricht (SilkRoad)"}).plot()