In [ ]:
GRAPHISTRY_CONFIG = {
'key': 'MY_API_KEY',
'server': 'labs.graphistry.com',
'protocol': 'https',
'api': 2
}
TIGER_CONFIG = {
'fqdn': 'http://MY_TIGER_SERVER:9000'
}
In [ ]:
#!pip install graphistry
In [ ]:
import pandas as pd
import requests
In [ ]:
### COMMON ISSUES: wrong server, wrong key, wrong protocol, network notebook->graphistry firewall permissions
import graphistry
#edit Configuration cell if cannot register
#graphistry.register(**GRAPHISTRY_CONFIG)
graphistry.__version__
In [ ]:
### EXPECTED RESULT: Visualization of a curved triangle
### COMMON ISSUES: Blank box as HTTPS not configured on Graphistry server so browser disallows iframe. Try plot(render=False)
g = graphistry\
.edges(pd.DataFrame({'s': [0,1,2], 'd': [1,2,0], 'a': ['quick', 'brown', 'fox'] }))\
.bind(source='s', destination='d')
g.plot() #g.plot(render=False)
In [ ]:
### EXPECTED RESULT: {'GET /statistics': ...}
### COMMON ISSUES: returns '{}' (may need to run a few times); wrong fqdn; firewall issues; ...
requests.get(TIGER_CONFIG['fqdn'] + '/statistics?seconds=60').json()
In [ ]:
# string -> dict
def query_raw(query_string):
url = TIGER_CONFIG['fqdn'] + "/query/" + query_string
r = requests.get(url)
return r.json()
def flatten (lst_of_lst):
try:
if type(lst_of_lst[0]) == list:
return [item for sublist in lst_of_lst for item in sublist]
else:
return lst_of_lst
except:
print('fail', lst_of_lst)
return lst_of_lst
#str * dict -> dict
def named_edge_to_record(name, edge):
record = {k: edge[k] for k in edge.keys() if not (type(edge[k]) == dict) }
record['type'] = name
nested = [k for k in edge.keys() if type(edge[k]) == dict]
if len(nested) == 1:
for k in edge[nested[0]].keys():
record[k] = edge[nested[0]][k]
else:
for prefix in nested:
for k in edge[nested[prefix]].keys():
record[prefix + "_" + k] = edge[nested[prefix]][k]
return record
def query(query_string):
results = query_raw(query_string)['results']
out = {}
for o in results:
for k in o.keys():
if type(o[k]) == list:
out[k] = flatten(o[k])
out = flatten([[named_edge_to_record(k,v) for v in out[k]] for k in out.keys()])
print('# results', len(out))
return pd.DataFrame(out)
def graph_edges(edges):
return graphistry.bind(source='from_id', destination='to_id').edges(edges)
In [ ]:
df = query("connection_mining?A=1&B=10&k=1000")
print('rows: ', len(df))
df.sample(3)
In [ ]:
### EXPECTED RESULT: GRAPH VISUALIZATION
### COMMON ISSUES: try inspecting query_raw('connection_mining?A=1&B=10&k=2')
graph_edges(query("connection_mining?A=1&B=10&k=1000")).plot()
Open Visual guide in a separate tab
point:degree histogram on bottom right, press each button and see what it doesScene settings -> Point size slider.Add a histogram for point:_titlecategorical vs gradient : What is the difference?edge:from_typecull isolated nodes to remove noisey nodes with no edges leftfilters -> edge:e_type -> edge:e_type ilike "%phone%"Toggle visual clustering and then off when stablized135 area codeIf you have a CSV and not a graph, hypergraphs are a quick way to analyze the data as a graph. They turn each entity into a node, and link them together if they are in the same row of the CSV. E.g., link together a phone and address. It does so indirectly -- it creates a node for the row, and connects the row to each entity mentioned.
In [ ]:
df = pd.read_csv('https://github.com/graphistry/pygraphistry/raw/master/demos/data/transactions.csv')
df.sample(10)
In [ ]:
hg = graphistry.hypergraph(df[:1000], entity_types=['Source', 'Destination', 'Transaction ID'])
print('Hypergraph parts', hg.keys())
hg['graph'].plot()
In [ ]:
help(graphistry.hypergraph)
In [ ]:
df1 = query("connection_mining?A=1&B=10&k=1000").assign(data_source='query1')
df2 = query("connection_mining?A=1&B=12&k=1000").assign(data_source='query2')
edges2 = pd.concat([df1, df2], ignore_index=True)
graph_edges(edges2).plot()
In [ ]:
conn = query("connection_mining?A=1&B=10&k=1000")
froms = conn.rename(columns={'from_id': 'id', 'from_type': 'node_type'})[['id', 'node_type']]
tos = conn.rename(columns={'to_id': 'id', 'to_type': 'node_type'})[['id', 'node_type']]
nodes = pd.concat([froms, tos], ignore_index=True).drop_duplicates().dropna()
nodes.sample(3)
In [ ]:
nodes['node_type'].unique()
In [ ]:
#https://labs.graphistry.com/docs/docs/palette.html
type2color = {
'phone_call': 0,
'citizen': 1,
'bank_account': 2,
'phone_number': 3,
'bank_transfer_event': 4,
'hotel_room_event': 5
}
nodes['color'] = nodes['node_type'].apply(lambda type_str: type2color[type_str])
nodes.sample(3)
In [ ]:
g = graphistry.bind(source='from_id', destination='to_id').edges(conn)
#updating colors
g = g.bind(node='id', point_color='color').nodes(nodes)
#saving sessions
g = g.settings(url_params={'workbook': 'my_workbook1'})
g.plot()
In [ ]: