In [1]:
#!pip install graphistry -q
In [2]:
import pandas as pd
import graphistry
#graphistry.register(key='MY_KEY', server='labs.graphistry.com')
Use a file by uploading it or via URL.
Run help(pd.read_csv) for more options.
kernel -> reconnect/tree) by clicking the Jupyter logoupload button on the top rightFiles tabUPLOAD/contenthelp(pd.read_csv) for more options
In [3]:
file_path = './events-1551346702.csv'
df = pd.read_csv(file_path)
print('# rows', len(df))
df.sample(min(len(df), 3))
Out[3]:
In [4]:
df.columns
Out[4]:
In [8]:
hits = pd.DataFrame([[c, len(df[c].unique())] for c in df.columns], columns=['col', 'num_uniq']).sort_values('num_uniq')
hits.query('num_uniq > 10 & num_uniq < 9288')
Out[8]:
In [10]:
skip_nodes = ['icij_notes', 'determined_cause', 'action_classification', 'icij_notes', 'country', 'status', 'source']
nodes = [x for x in list(hits.query('num_uniq > 10 & num_uniq < 9288')['col']) if not x in skip_nodes]
nodes
Out[10]:
In [12]:
df = df_orig.query('country == "USA"')
Set mode and the corresponding values:
In [13]:
#Pick 'A', 'B', or 'C'
mode = 'B'
max_rows = 50000
### 'A' == mode
my_src_col = 'attackerIP'
my_dest_col = 'victimIP'
### 'B' == mode
node_cols = nodes
categories = { #optional
#'date': [ 'create_date', 'date_initiated_by_firm', 'date_posted', 'date_terminated', 'updated_at' ]
#'ip': ['attacker_IP', 'victimIP']
#, 'user': ['owner', 'seller'],
}
### 'C' == mode
edges = {
'attackerIP': [ 'victimIP', 'victimPort', 'vulnName'],
'victimIP': [ 'victimPort'],
'vulnName': [ 'victimIP' ]
}
categories = { #optional
'ip': ['attackerIP', 'victimIP']
#, user': ['owner', 'seller'], ...
}
In [14]:
g = None
hg = None
num_rows = min(max_rows, len(df))
if mode == 'A':
g = graphistry.edges(df.sample(num_rows)).bind(source=my_src_col, destination=my_dest_col)
elif mode == 'B':
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, opts={'CATEGORIES': categories})
g = hg['graph']
elif mode == 'C':
nodes = list(edges.keys())
for dests in edges.values():
for dest in dests:
nodes.append(dest)
node_cols = list(set(nodes))
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, direct=True, opts={'CATEGORIES': categories, 'EDGES': edges})
g = hg['graph']
#hg
print(len(g._edges))
g.plot()
Out[14]:
In [15]:
#!pip install graphistry -q
import pandas as pd
import graphistry
#graphistry.register(key='MY_KEY', server='labs.graphistry.com')
##########
#1. Load
file_path = './events-1551346702.csv'
df = pd.read_csv(file_path)
print(df.columns)
print('rows:', len(df))
print(df.sample(min(len(df),3)))
In [16]:
##########
#2. Clean
#df = df.rename(columns={'attackerIP': 'src_ip', 'victimIP: 'dest_ip', 'victimPort': 'protocol'})
hits = pd.DataFrame([[c, len(df[c].unique())] for c in df.columns], columns=['col', 'num_uniq']).sort_values('num_uniq')
skip_nodes = ['icij_notes', 'determined_cause', 'action_classification', 'icij_notes', 'country', 'status', 'source']
nodes = [x for x in list(hits.query('num_uniq > 10 & num_uniq < 9288')['col']) if not x in skip_nodes]
df = df.query('country == "USA"')
##########
#3. Config - Pick 'A', 'B', or 'C'
mode = 'B'
max_rows = 50000
### 'A' == mode
my_src_col = 'attackerIP'
my_dest_col = 'victimIP'
### 'B' == mode
node_cols = nodes
categories = { #optional
#'ip': ['src_ip', 'dest_ip']
#, 'user': ['owner', 'seller'],
}
### 'C' == mode
edges = {
'attackerIP': [ 'victimIP', 'victimPort', 'vulnName'],
'victimIP': [ 'victimPort' ],
'vulnName': ['victimIP' ]
}
categories = { #optional
#'ip': ['attackerIP', 'victimIP']
#, 'user': ['owner', 'seller'], ...
}
##########
#4. Plot
g = None
hg = None
num_rows = min(max_rows, len(df))
if mode == 'A':
g = graphistry.edges(df.sample(num_rows)).bind(source=my_src_col, destination=my_dest_col)
elif mode == 'B':
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, opts={'CATEGORIES': categories})
g = hg['graph']
elif mode == 'C':
nodes = list(edges.keys())
for dests in edges.values():
for dest in dests:
nodes.append(dest)
node_cols = list(set(nodes))
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, direct=True, opts={'CATEGORIES': categories, 'EDGES': edges})
g = hg['graph']
g.plot()
Out[16]:
In [ ]: