In [63]:
seperatingLine = "\n########################################################################################################\n"
A single record in a cursor as result of a cypher-query execution contains the informations about a single (or a list of) node(s)/relationship(s), depending on which type the result objects of the query have.
For example the following query:
MATCH (v:VEHICLE:MITSUBISHI) RETURN *
In this case ist the result data (Cursor) a set of node objects. Each row in this cursor has records. A record contains in the top level a key/value map with a key, that called with the same variable-name, such that used in the query for the object (here "v" as variable-name of a vehicle-object). The value of this item represents the node informations and is from Type Node:
('v': (ac742de:AUTO:MITSUBISHI:STROM:VEHICLE {fuelType:"Strom",kw:35,modelDetails:"ELEKTRO 35kW Automatik 4-Sitzer",modelName:"i-Miev",ownershipType:"Langzeitmiete",registrationPlate:"F-R 8009",vehicleID:148221,vin:"JMBLDHA3WBU000341"}))
By an access to the value of the top level map, we access to an object of type Node:
sub = record["v"]
(ac742de:AUTO:MITSUBISHI:STROM:VEHICLE {fuelType:"Strom",kw:35,modelDetails:"ELEKTRO 35kW Automatik 4-Sitzer",modelName:"i-Miev",ownershipType:"Langzeitmiete",registrationPlate:"F-R 8009",vehicleID:148221,vin:"JMBLDHA3WBU000341"})
Because of this object type we can also access to the labels of the node:
record["v"].labels()
SetView({'AUTO', 'MITSUBISHI', 'VEHICLE', 'STROM'})
With a conversion of the node to a dictionary it is possible to select the attributes of the node;
dict(record["v"])
{'ownershipType': 'Langzeitmiete', 'modelName': 'i-Miev', 'modelDetails': 'ELEKTRO 35kW Automatik 4-Sitzer', 'fuelType': 'Strom', 'vin': 'JMBLDHA3WBU000341', 'registrationPlate': 'F-R 8009', 'vehicleID': 148221, 'kw': 35}
Alternatively we can access to the attributes with the following way:
print(record["v"]["modelName"])
i-Miev
For the query MATCH (v:VEHICLE:MITSUBISHI)-[r:WAS_BOOKED_IN]->(s:STATION) RETURN v, r, s it will be returned three objects and one from type Relationship (variable r).
By a relationship its possible to access following informations over the API:
Type of relationship
relationship.type()
-> WAS_BOOKED_IN
All nodes
relationship.nodes()
((a34e56f:AUTO:MITSUBISHI:STROM:VEHICLE {bordComputerType:"Invers BCSA 2006 GPRS",fuelType:"Strom",kw:35,modelDetails:"ELEKTRO 35kW NAVI Automatik 4-Sitzer",modelName:"i-Miev",ownershipType:"Langzeitmiete",registrationPlate:"F-R 8011",vehicleID:148261,vin:"JMBLDHA3WBU000344"}), (f55f604:INACTIVE:STATION:STATIONBASED {city:"Stuttgart",code:"STG",latitude:48.780357360839844,longtitude:9.186469078063965,name:"Parkgarage Staatsgalerie",poiAirport:"Nein",poiLongDistanceTrains:"Nein",poiSuburbanTrains:"Nein",poiUnderground:"Nein",rentalZoneID:401727,type:"stationbased"}))
Start or end node
relationship.start_node()
(a34e56f:AUTO:MITSUBISHI:STROM:VEHICLE {bordComputerType:"Invers BCSA 2006 GPRS",fuelType:"Strom",kw:35,modelDetails:"ELEKTRO 35kW NAVI Automatik 4-Sitzer",modelName:"i-Miev",ownershipType:"Langzeitmiete",registrationPlate:"F-R 8011",vehicleID:148261,vin:"JMBLDHA3WBU000344"})
relationship.end_node()
(f55f604:INACTIVE:STATION:STATIONBASED {city:"Stuttgart",code:"STG",latitude:48.780357360839844,longtitude:9.186469078063965,name:"Parkgarage Staatsgalerie",poiAirport:"Nein",poiLongDistanceTrains:"Nein",poiSuburbanTrains:"Nein",poiUnderground:"Nein",rentalZoneID:401727,type:"stationbased"})
Attributes of the relationship
relationship[times]
28
In [70]:
def printOutNodeInformations(node, singleAttributeName):
print('keys of the node')
print(node.keys())
print('labels of the node')
print(node.labels())
print('single attribute access')
print(node[singleAttributeName])
In [92]:
def printOutRelationshipInformations(relationship, singleAttributeName):
print('type of the relationship')
print(relationship.type())
print('single attribute access')
print(relationship[singleAttributeName])
print('all nodes of relationship')
print(relationship.nodes())
print('start node of relationship')
print(relationship.start_node())
print('end node of relationship')
print(relationship.end_node())
In [388]:
from py2neo import Graph, Path, Subgraph, Node, PropertyDict, Relationship, Walkable, walk
graph = Graph("http://neo4j:neo4jj@localhost:7474/db/data")
query = """
MATCH (v:VEHICLE:MITSUBISHI) RETURN *
"""
cursor = graph.run(query)
for record in cursor:
print('raw view of a record:')
print(record)
print('value in the records top level map:')
print(record["v"])
print('dictionary representation of the node attributes')
print(dict(record["v"]))
print('dictionary keys (node attribute names)')
print('%s Node informations (VEHICLE) %s' %(seperatingLine, seperatingLine))
node = record["v"]
printOutNodeInformations(node, "modelName")
In [389]:
query = """
MATCH (v:VEHICLE:MITSUBISHI)-[r:WAS_BOOKED_IN]->(s:STATION) RETURN v, r, s
"""
cursor = graph.run(query)
# print(cursor.data())
for record in cursor:
print('%s Node informations (VEHICLE) %s' %(seperatingLine, seperatingLine))
vehicle = record["v"]
printOutNodeInformations(vehicle, "modelName")
print('%s Node informations (STATION) %s' %(seperatingLine, seperatingLine))
station = record["s"]
printOutNodeInformations(station, "name")
print('%s Relationship informations (WAS_BOOKED_IN) %s' %(seperatingLine, seperatingLine))
station = record["r"]
printOutRelationshipInformations(station, "times")
In the library networkX is every graph a set of edges, where each one of them connects two nodes. Every edge or node can also have attributes. In the following we prepare the data in the dataframe - as result of a trivial query - for this view about the rental-zone/vehicle data:
dfn['VEHICLE_ID'] = dfn.apply({'v' : lambda x: x["vehicleID"]})
dfn['RENTALZONE_ID'] = dfn.apply({'s' : lambda x: x["rentalZoneID"]})
dfn['vModelName'] = dfn.apply({'v' : lambda x: x["modelName"]})
dfn['sName'] = dfn.apply({'s' : lambda x: x["name"]})
dfn['TIMES'] = dfn.apply({'r' : lambda x: x["times"]})
Alternatively we can also access to the needed informations in this way:
dfn["v"]["vehicleID"]
dfn["s"]["rentalZoneID"]
dfn["r"]["times"]
NetworkX provides an easy way to import the data in a dataframe as edges and nodes:
G2=nx.from_pandas_dataframe(dfn, 'VEHICLE_ID', 'RENTALZONE_ID', ['TIMES'])
The disadvantage of this approach is, that we can't import node attributes over this interface. This will also don't work, if we try to import the nodes over a explicit function,
def addVRZNodesToGraph(row, graph):
graph.add_node(row["RENTALZONE_ID"],code=str(row["s"]["code"]))
graph.add_node(row["VEHICLE_ID"],vin=str(row["v"]["vin"]))
return graph
...
... and import on this basis the data to the graph:
G2=nx.Graph()
dfn.apply(addVRZNodesToGraph, axis=1, graph=G2)
print (G2.nodes(data=True))
G2=nx.from_pandas_dataframe(dfn, 'VEHICLE_ID', 'RENTALZONE_ID', ['TIMES'])
print (G.edges(data=True))
print (G.nodes(data=True))
If we look at the result of the above code, we find out, that the nodes were overridden within the import operation:
########################################################################################################
Node informations in the graph before import
########################################################################################################
[(403352, {'code': 'WIL-ELEKTRO'}), (148221, {'vin': 'JMBLDHA3WBU000341'}), (404993, {'code': 'BF-Elektro'}), (148261, {'vin': 'JMBLDHA3WBU000344'}), (401727, {'code': 'STG'})]
########################################################################################################
Node informations in the graph after import
########################################################################################################
[(148221, {}), (403352, {}), (148261, {}), (404993, {}), (401727, {})]
########################################################################################################
Edge informations in the graph after import
########################################################################################################
[(148221, 403352, {'TIMES': '791'}), (148261, 404993, {'TIMES': '245'}), (148261, 401727, {'TIMES': '28'})]
Because of this side effect we use only the way to add nodes and edges manually to the graph:
def addVRZEdgesToGraph(row, graph, relationshipType):
graph.add_edge(row["VEHICLE_ID"], row["RENTALZONE_ID"],{'type': relationshipType, 'times': row["TIMES"]})
return graph
...
G=nx.Graph()
dfn.apply(addVRZNodesToGraph, axis=1, graph=G)
dfn.apply(addVRZEdgesToGraph, axis=1, graph=G, relationshipType='WAS_BOOKED_IN')
...
print('%s Node informations in the graph after import %s' %(seperatingLine, seperatingLine))
print (G.nodes(data=True))
print('%s Edge informations in the graph after import %s' %(seperatingLine, seperatingLine))
print (G.edges(data=True))
The output of the code above is:
########################################################################################################
Node informations in the graph after manual import
########################################################################################################
[(403352, {'code': 'WIL-ELEKTRO'}), (148221, {'vin': 'JMBLDHA3WBU000341'}), (404993, {'code': 'BF-Elektro'}), (148261, {'vin': 'JMBLDHA3WBU000344'}), (401727, {'code': 'STG'})]
########################################################################################################
Edge informations in the graph after manual import
########################################################################################################
[(403352, 148221, {'type': 'WAS_BOOKED_IN', 'times': '791'}), (404993, 148261, {'type': 'WAS_BOOKED_IN', 'times': '245'}), (148261, 401727, {'type': 'WAS_BOOKED_IN', 'times': '28'})]
In our case we'll draw also labels for the nodes and edges. NetworkX provides in the functionality a way to pass the labels of nodes and edges as parameter. For the node labels most them exist as dict and for edges as simple list.
Therefore we prepare the labels on the basis of the existing data as follows.
We've two different types of nodes: Vehicle and rental zone. For this reason we must collect different informations as values for keys (IDs) from both entities. The id-namespace of both entities aren't overlapping in our case. We bring the IDs of both data in the first step together:
dfnnl = dfn.drop(['r', 's', 'v', 'TIMES', 'vModelName', 'sName'], axis=1).copy(True)
dfnnl = dfnnl.reset_index()
dfnnl = dfnnl.drop("index", axis=1)
dfnnl = dfnnl.stack()
dfnnl = dfnnl.reset_index()
dfnnl = dfnnl.rename_axis({"level_0": "levelName", "level_1": "columnName", 0: "ID"}, axis="columns")
dfnnl["columnName"] = dfnnl["columnName"].astype(str)
In the second step we append a label column to the data frame with the specific label-information for each entity (vehicle or rental zone):
def produceLabelInformation(row, orgData):
label = " "
if str(row["columnName"]) == 'VEHICLE_ID':
label = orgData.loc[(orgData['VEHICLE_ID'] == row["ID"])]
.drop_duplicates(subset=["vModelName"],keep="first")["vModelName"].values[0]
else:
label = orgData.loc[(orgData['RENTALZONE_ID'] == row["ID"])]
.drop_duplicates(subset=["sName"],keep="first")["sName"].values[0]
return label
...
dfnnl["LABEL"] = dfnnl.apply(produceLabelInformation, axis=1, orgData=dfn)
print('%s IDs from vehicles and rental zones with label informations %s' %(seperatingLine, seperatingLine))
print(dfnnl)
The code above produces following output:
########################################################################################################
IDs from vehicles and rental zones with label informations
########################################################################################################
levelName columnName ID LABEL
0 0 VEHICLE_ID 148221 i-Miev
1 0 RENTALZONE_ID 403352 Wilhelmstraße-ELEKTRO
2 1 VEHICLE_ID 148261 i-Miev
3 1 RENTALZONE_ID 404993 Bahnhof-ELEKTRO
5 2 RENTALZONE_ID 401727 Parkgarage Staatsgalerie
Because of the uniformity in the relationships between the nodes, we need only a constant label for each edge. Therefore it's enough to produce a list containing string members for a single label:
def prepareEdgeLabelsForGraph(dfn):
dfnel = dfn.apply({'r' : lambda x: 'WAS_BOOKED_IN'})
print('%s Edge labels %s' %(seperatingLine, seperatingLine))
dfnel = dfnel.reset_index()
dfnel = dfnel.drop("index", axis=1)
dfnel = dfnel.rename_axis({"r": "edgeLabel"}, axis="columns")
print(dfnel)
return dfnel
...
dfnel = prepareEdgeLabelsForGraph(dfn)
The code above produces following output:
########################################################################################################
Edge labels
########################################################################################################
edgeLabel
0 WAS_BOOKED_IN
1 WAS_BOOKED_IN
2 WAS_BOOKED_IN
We can now pass all informations together (graph and labels) to a simple function, that draws a graph as image:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
%matplotlib inline
def draw_graph(graph, layout, edgeLabels, nodeLabels, name):
edge_labels = dict(zip(graph.edges(), edgeLabels))
G = graph
graph_pos = layout
plt.figure(3,figsize=(30,30))
# draw nodes, edges and labels
nx.draw_networkx_nodes(G, graph_pos, node_size=15000, node_color='blue', alpha=0.3)
# we can now added edge thickness and edge color
nx.draw_networkx_edges(G, graph_pos, width=5, alpha=0.3, edge_color='green')
nx.draw_networkx_labels(G, graph_pos, nodeLabels, font_size=16, font_family='sans-serif')
nx.draw_networkx_edge_labels(G, graph_pos, font_size=16, edge_labels=edge_labels)
plt.savefig("graph_" + name + ".png",
dpi=100,
facecolor='w',
edgecolor='w',
orientation='portrait',
papertype=None,
format=None,
transparent=False,
bbox_inches=None,
pad_inches=0.1)
plt.show()
...
draw_graph(G, nx.spring_layout(G, 2, 1), edgeLabels, nodeLabels, "spring")
In [390]:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
%matplotlib inline
def draw_graph(graph, layout, edgeLabels, nodeLabels, name):
edge_labels = dict(zip(graph.edges(), edgeLabels))
G = graph
graph_pos = layout
plt.figure(3,figsize=(30,30))
# draw nodes, edges and labels
nx.draw_networkx_nodes(G, graph_pos, node_size=15000, node_color='blue', alpha=0.3)
# we can now added edge thickness and edge color
nx.draw_networkx_edges(G, graph_pos, width=5, alpha=0.3, edge_color='green')
nx.draw_networkx_labels(G, graph_pos, nodeLabels, font_size=16, font_family='sans-serif')
nx.draw_networkx_edge_labels(G, graph_pos, font_size=16, edge_labels=edge_labels)
plt.savefig("graph_" + name + ".png",
dpi=100,
facecolor='w',
edgecolor='w',
orientation='portrait',
papertype=None,
format=None,
transparent=False,
bbox_inches=None,
pad_inches=0.1)
plt.show()
In [391]:
def addVRZNodesToGraph(row, graph):
graph.add_node(row["RENTALZONE_ID"],code=str(row["s"]["code"]))
graph.add_node(row["VEHICLE_ID"],vin=str(row["v"]["vin"]))
return graph
In [392]:
def addVRZEdgesToGraph(row, graph, relationshipType):
graph.add_edge(row["VEHICLE_ID"], row["RENTALZONE_ID"],{'type': relationshipType, 'times': row["TIMES"]})
return graph
In [401]:
def produceLabelInformation(row, orgData):
label = " "
if str(row["columnName"]) == 'VEHICLE_ID':
label = orgData.loc[(orgData['VEHICLE_ID'] == row["ID"])]\
.drop_duplicates(subset=["vModelName"],keep="first")["vModelName"].values[0]
else:
label = orgData.loc[(orgData['RENTALZONE_ID'] == row["ID"])]\
.drop_duplicates(subset=["sName"],keep="first")["sName"].values[0]
return label
In [402]:
def prepareDataForGraph(df):
dfn = df.reset_index()
dfn = dfn.drop("index", axis=1)
print('%s Original dataframe without index %s' %(seperatingLine, seperatingLine))
print(dfn.head())
dfn['VEHICLE_ID'] = dfn.apply({'v' : lambda x: x["vehicleID"]})
dfn['RENTALZONE_ID'] = dfn.apply({'s' : lambda x: x["rentalZoneID"]})
dfn['vModelName'] = dfn.apply({'v' : lambda x: x["modelName"]})
dfn['sName'] = dfn.apply({'s' : lambda x: x["name"]})
dfn['TIMES'] = dfn.apply({'r' : lambda x: x["times"]})
print('%s Extended dataframe %s' %(seperatingLine, seperatingLine))
print(dfn)
return dfn
In [403]:
def prepareNodeLabelsForGraph(dfn):
dfnnl = dfn.drop(['r', 's', 'v', 'TIMES', 'vModelName', 'sName'], axis=1).copy(True)
dfnnl = dfnnl.reset_index()
dfnnl = dfnnl.drop("index", axis=1)
dfnnl = dfnnl.stack()
dfnnl = dfnnl.reset_index()
dfnnl = dfnnl.rename_axis({"level_0": "levelName", "level_1": "columnName", 0: "ID"}, axis="columns")
dfnnl["columnName"] = dfnnl["columnName"].astype(str)
dfnnl = dfnnl.drop_duplicates(subset=["ID"],keep="first")
# dfnnl.append(dfn.get(['RENTALZONE_ID']).copy(True), ignore_index=True)
print('%s Stacked ids from vehicles and rental zones %s' %(seperatingLine, seperatingLine))
print(dfnnl)
dfnnl["LABEL"] = dfnnl.apply(produceLabelInformation, axis=1, orgData=dfn)
print('%s IDs from vehicles and rental zones with label informations %s' %(seperatingLine, seperatingLine))
print(dfnnl)
return dfnnl
In [404]:
def prepareEdgeLabelsForGraph(dfn):
dfnel = dfn.apply({'r' : lambda x: 'WAS_BOOKED_IN'})
print('%s Edge labels %s' %(seperatingLine, seperatingLine))
dfnel = dfnel.reset_index()
dfnel = dfnel.drop("index", axis=1)
dfnel = dfnel.rename_axis({"r": "edgeLabel"}, axis="columns")
print(dfnel)
return dfnel
In [405]:
#
# Not recommend way to hold data to a graph
#
def importDataToGraph(dfn):
G2=nx.Graph()
dfn.apply(addVRZNodesToGraph, axis=1, graph=G2)
print('%s Node informations in the graph before import %s' %(seperatingLine, seperatingLine))
print (G2.nodes(data=True))
G2=nx.from_pandas_dataframe(dfn, 'VEHICLE_ID', 'RENTALZONE_ID', ['TIMES'])
print('%s Node informations in the graph after import %s' %(seperatingLine, seperatingLine))
print (G2.nodes(data=True))
print('%s Edge informations in the graph after import %s' %(seperatingLine, seperatingLine))
print (G2.edges(data=True))
return G2
In [406]:
#
# Recommend way to hold data to a graph
#
def addDataToGraph(dfn):
G=nx.Graph()
dfn.apply(addVRZNodesToGraph, axis=1, graph=G)
dfn.apply(addVRZEdgesToGraph, axis=1, graph=G, relationshipType='WAS_BOOKED_IN')
print('%s Node informations in the graph after manual import %s' %(seperatingLine, seperatingLine))
print (G.nodes(data=True))
print('%s Edge informations in the graph after manual import %s' %(seperatingLine, seperatingLine))
print (G.edges(data=True))
return G
In [407]:
data = graph.data(query)
df = pd.DataFrame(data)
dfn = prepareDataForGraph(df)
dfnnl = prepareNodeLabelsForGraph(dfn)
dfnel = prepareEdgeLabelsForGraph(dfn)
#
# Recommend way to hold data to a graph
#
G = addDataToGraph(dfn)
#
# Not recommend way to hold data to a graph
#
G2 = importDataToGraph(dfn)
In [408]:
nodeLabels = dict(zip(dfnnl["ID"], dfnnl["LABEL"]))
edgeLabels = dfnel["edgeLabel"].astype(str)
print(nodeLabels)
print(edgeLabels)
draw_graph(G, nx.spring_layout(G, 2, 1), edgeLabels, nodeLabels, "spring")
In [ ]: