In [1]:
#!pip install graphistry -q
In [3]:
import pandas as pd
import graphistry
#graphistry.register(key='MY_KEY', server='labs.graphistry.com')
Use a file by uploading it or via URL.
Run help(pd.read_csv) for more options.
kernel -> reconnect/tree) by clicking the Jupyter logoupload button on the top rightFiles tabUPLOAD/contenthelp(pd.read_csv) for more options
In [4]:
file_path = './data/honeypot.csv'
df = pd.read_csv(file_path)
print('# rows', len(df))
df.sample(min(len(df), 3))
Out[4]:
In [5]:
df = df.rename(columns={
# 'attackerIP': 'src_ip',
# 'victimIP': 'dest_ip'
})
df.sample(3)
Out[5]:
Set mode and the corresponding values:
In [6]:
#Pick 'A', 'B', or 'C'
mode = 'B'
max_rows = 1000
### 'A' == mode
my_src_col = 'attackerIP'
my_dest_col = 'victimIP'
### 'B' == mode
node_cols = ['attackerIP', 'victimIP', 'vulnName']
categories = { #optional
'ip': ['attacker_IP', 'victimIP']
#, 'user': ['owner', 'seller'],
}
### 'C' == mode
edges = {
'attackerIP': [ 'victimIP', 'victimPort', 'vulnName'],
'victimIP': [ 'victimPort'],
'vulnName': [ 'victimIP' ]
}
categories = { #optional
'ip': ['attackerIP', 'victimIP']
#, user': ['owner', 'seller'], ...
}
In [75]:
g = None
hg = None
num_rows = min(max_rows, len(df))
if mode == 'A':
g = graphistry.edges(df.sample(num_rows)).bind(source=my_src_col, destination=my_dest_col)
elif mode == 'B':
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, opts={'CATEGORIES': categories})
g = hg['graph']
elif mode == 'C':
nodes = list(edges.keys())
for dests in edges.values():
for dest in dests:
nodes.append(dest)
node_cols = list(set(nodes))
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, direct=True, opts={'CATEGORIES': categories, 'EDGES': edges})
g = hg['graph']
#hg
print(len(g._edges))
g.plot()
Out[75]:
In [59]:
#!pip install graphistry -q
import pandas as pd
import graphistry
#graphistry.register(key='MY_KEY', server='labs.graphistry.com')
##########
#1. Load
file_path = './data/honeypot.csv'
df = pd.read_csv(file_path)
print(df.columns)
print('rows:', len(df))
print(df.sample(min(len(df),3)))
In [79]:
##########
#2. Clean
#df = df.rename(columns={'attackerIP': 'src_ip', 'victimIP: 'dest_ip', 'victimPort': 'protocol'})
##########
#3. Config - Pick 'A', 'B', or 'C'
mode = 'C'
max_rows = 1000
### 'A' == mode
my_src_col = 'attackerIP'
my_dest_col = 'victimIP'
### 'B' == mode
node_cols = ['attackerIP', 'victimIP', 'victimPort', 'vulnName']
categories = { #optional
'ip': ['src_ip', 'dest_ip']
#, 'user': ['owner', 'seller'],
}
### 'C' == mode
edges = {
'attackerIP': [ 'victimIP', 'victimPort', 'vulnName'],
'victimIP': [ 'victimPort' ],
'vulnName': ['victimIP' ]
}
categories = { #optional
'ip': ['attackerIP', 'victimIP']
#, 'user': ['owner', 'seller'], ...
}
##########
#4. Plot
g = None
hg = None
num_rows = min(max_rows, len(df))
if mode == 'A':
g = graphistry.edges(df.sample(num_rows)).bind(source=my_src_col, destination=my_dest_col)
elif mode == 'B':
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, opts={'CATEGORIES': categories})
g = hg['graph']
elif mode == 'C':
nodes = list(edges.keys())
for dests in edges.values():
for dest in dests:
nodes.append(dest)
node_cols = list(set(nodes))
hg = graphistry.hypergraph(df.sample(num_rows), node_cols, direct=True, opts={'CATEGORIES': categories, 'EDGES': edges})
g = hg['graph']
g.plot()
Out[79]:
In [ ]: