Spigo System Architecture


In [8]:
import pandas as pd
import graphistry
#graphistry.register('PUT API KEY HERE', server='labs.graphistry.com') #https://www.graphistry.com/api-request

ETL: JSON URL -> Pandas


In [9]:
import json
import urllib2

data = urllib2.urlopen('https://raw.githubusercontent.com/adrianco/spigo/master/json/cassandra9.json')
data = json.load(data)[u'graph']
nodes = pd.DataFrame([x for x in data if u'node' in x])
edges = pd.DataFrame([x for x in data if u'edge' in x])

data[:3]


Out[9]:
[{u'metadata': u'IP/54.198.0.1',
  u'node': u'cassandra.us-east-1.zoneA.cassandra00',
  u'package': u'priamCassandra',
  u'timestamp': u'2016-04-17T13:38:11.415643297-07:00'},
 {u'metadata': u'IP/54.221.0.1',
  u'node': u'cassandra.us-east-1.zoneB.cassandra01',
  u'package': u'priamCassandra',
  u'timestamp': u'2016-04-17T13:38:11.415835924-07:00'},
 {u'edge': u'e1',
  u'source': u'cassandra.us-east-1.zoneA.cassandra00',
  u'target': u'cassandra.us-east-1.zoneB.cassandra01',
  u'timestamp': u'2016-04-17T13:38:11.415878501-07:00'}]

In [10]:
nodes[:3]


Out[10]:
metadata node package timestamp
0 IP/54.198.0.1 cassandra.us-east-1.zoneA.cassandra00 priamCassandra 2016-04-17T13:38:11.415643297-07:00
1 IP/54.221.0.1 cassandra.us-east-1.zoneB.cassandra01 priamCassandra 2016-04-17T13:38:11.415835924-07:00
2 IP/50.19.0.1 cassandra.us-east-1.zoneC.cassandra02 priamCassandra 2016-04-17T13:38:11.416041115-07:00

In [11]:
edges[:3]


Out[11]:
edge source target timestamp
0 e1 cassandra.us-east-1.zoneA.cassandra00 cassandra.us-east-1.zoneB.cassandra01 2016-04-17T13:38:11.415878501-07:00
1 e2 cassandra.us-east-1.zoneB.cassandra01 cassandra.us-east-1.zoneA.cassandra00 2016-04-17T13:38:11.415984352-07:00
2 e3 cassandra.us-east-1.zoneC.cassandra02 cassandra.us-east-1.zoneB.cassandra01 2016-04-17T13:38:11.416201946-07:00

Plot!


In [12]:
# Plot graph using the source/target columns as source/destination of edges
g = graphistry.bind(source='source', destination='target').bind(node='node')
g.nodes(nodes).edges(edges).plot()


Out[12]:

In [13]:
hg = graphistry.hypergraph(
    pd.concat([nodes, edges], ignore_index=True),
    ['metadata', 'node', 'package', 'source', 'target'],
    direct=True,
    opts={
      'CATEGORIES': {
          'service': ['node', 'source', 'target']
      }  
    })
hg['graph'].plot()


('# links', 12627)
('# events', 11893)
('# attrib entities', 740)
Out[13]:

In [ ]: