In [1]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.debug("Logger active")
In [2]:
from pyvast import VAST
In [10]:
import pyarrow
In [11]:
vast = VAST(binary="vast")
In [12]:
await vast.test_connection()
Out[12]:
In [13]:
async def query(q):
stdout, stderr = await vast.export().arrow(q).exec()
reader = pyarrow.ipc.open_stream(stdout)
return reader.read_all()
In [14]:
# Select all events of type zeek.conn
data = await query("#type ~ /zeek.conn/")
In [15]:
data.shape
Out[15]:
In [16]:
data.schema
Out[16]:
In [17]:
import pandas
df = data.to_pandas()
In [18]:
df.head()
Out[18]:
In [19]:
df.describe()
Out[19]:
In [20]:
import ipaddress
# Accepts a 128-bit buffer holding an IPv6 address and
# returns an IPv4 or IPv6 address.
def unpack_ip(buffer):
num = int.from_bytes(buffer, byteorder='big')
# convert IPv4 mapped addresses back to regular IPv4
# https://tools.ietf.org/html/rfc4291#section-2.5.5.2
if (num >> 32) == 65535:
num = num - (65535 << 32)
return ipaddress.ip_address(num)
In [21]:
import networkx as nx
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 15]
In [22]:
source = 'id.orig_h'
dest = 'id.resp_h'
edgelist = df.groupby([source, dest]).size().to_frame(name='weight').reset_index()
edgelist[source] = edgelist[source].apply(unpack_ip)
edgelist[dest] = edgelist[dest].apply(unpack_ip)
In [23]:
def draw(edgelist, with_labels=False):
G = nx.from_pandas_edgelist(edgelist, source=source, target=dest, edge_attr='weight')
pos = nx.kamada_kawai_layout(G, weight=None)
edges, weights = zip(*nx.get_edge_attributes(G,'weight').items())
weights = [x / 100 for x in weights]
nx.draw_networkx_nodes(G, pos=pos)
nx.draw_networkx_edges(G, pos=pos, edgelist=edges, width=weights)
if with_labels:
nx.draw_networkx_labels(G, pos=pos)
In [24]:
draw(edgelist)
In [25]:
pruned = edgelist[edgelist['weight'] > 8]
draw(pruned, with_labels=True)
In [26]:
pruned = edgelist[edgelist['weight'] > 16]
draw(pruned, with_labels=True)
In [27]:
pruned = edgelist[edgelist['weight'] > 24]
draw(pruned, with_labels=True)
In [ ]:
In [ ]: