In [1]:
from regraph import NXGraph, TypedNeo4jGraph, Rule
from regraph.attribute_sets import IntegerSet, RegexSet
from regraph import plot_rule
import json
In [2]:
%matplotlib inline
In [3]:
# Define schema graph elements
schema_nodes = [
("Person", {"age": IntegerSet.universal()}),
("Organization", {"location": RegexSet.universal()})]
schema_edges = [
("Person", "Person", {"type": {"friend", "parent", "colleague"}}),
("Organization", "Organization"),
("Person", "Organization", {"type": {"works_in", "studies_in"}})
]
# Define data graph elements
data_nodes = [
("Alice", {"age": 15}),
("Bob"),
("Eric", {"age": 45}),
("Sandra", {"age": 47}),
("ENS Lyon", {"location": "Lyon"}),
("UN", {"location": "Geneva"}),
("INTERPOL", {"location": "Lyon"})
]
data_edges = [
("Alice", "Bob", {"type": "friend"}),
("Bob", "Alice", {"type": "friend"}),
("Sandra", "Eric", {"type": "friend"}),
("Sandra", "Bob", {"type": "parent"}),
("Eric", "Alice", {"type": "parent"}),
("Eric", "UN"),
("Eric", "Sandra", {"type": "colleague"})
]
# Provide typing of the data by the schema
data_typing = {
"Alice": "Person",
"Bob": "Person",
"Eric": "Person",
"Sandra": "Person",
"ENS Lyon": "Organization",
"UN": "Organization",
"INTERPOL": "Organization"
}
In [4]:
# Create a schema-aware PG (clear the db if already exists)
graph = TypedNeo4jGraph(
uri="bolt://localhost:7687",
user="neo4j",
password="admin",
data_graph={"nodes": data_nodes, "edges": data_edges},
schema_graph={"nodes": schema_nodes, "edges": schema_edges},
typing=data_typing,
clear=True)
In [5]:
print("Schema object: ", type(graph.get_schema()))
print("Schema nodes: ", graph.get_schema_nodes())
print("Schema edges: ", graph.get_schema_edges())
print("\nData object: ", type(graph.get_data()))
print("Data nodes: ", graph.get_data_nodes())
print("Data edges: ", graph.get_data_edges())
print("\nData typing:", json.dumps(graph.get_data_typing(), indent=" "))
ReGraph implements the rewriting technique called sesqui-pushout rewriting
that allows to transform graphs by applying rules through their instances (matchings). Rewriting of the data or the schema may require an update to the other graph, such updates are called propagation and are distinguished into two types: backward and forward propagation.
Backward propagation briefly:
Forward propagation briefly:
For more details, please see here.
ReGraph allows to rewrite schema-aware PGs and their schemas using the methods rewrite_data
and rewrite_schema
of TypedNeo4jGraph
. The rewriting can be done in two modes:
Strict rewriting rewriting that does not allow propagation.
Not strict rewriting that allows propagation.
TypedNeo4jGraph
implements a set of methods that perform transformations of both data and schema that do not require propagation. Conider the following examples.
In [6]:
graph.add_schema_node("Country", {"location": RegexSet.universal()})
graph.add_schema_edge("Organization", "Country", {"type": {"located_in"}})
In [7]:
print("Schema nodes: ", graph.get_schema_nodes())
print("Schema edges: ", graph.get_schema_edges())
In [8]:
graph.add_data_node("France", typing="Country", attrs={"location": "Europe"})
graph.add_data_edge("INTERPOL", "France", {"type": "located_in"})
In [9]:
print("Data nodes: ", graph.get_data_nodes())
print("Data edges: ", graph.get_data_edges())
We will now create a rule that applied to the schema and that clones the node Organization
into two nodes.
In [10]:
lhs = NXGraph()
lhs.add_nodes_from(["Organization"])
rule = Rule.from_transform(lhs)
_, rhs_clone = rule.inject_clone_node("Organization")
plot_rule(rule)
In [11]:
instance = {
"Organization": "Organization"
}
We try to apply the created rule to the graph T
in the strict mode.
In [12]:
try:
rhs_instance = graph.rewrite_schema(rule, strict=True)
except Exception as e:
print("Error message: ", e)
print("Type: ", type(e))
We have failed to rewrite the schema, because we have not specified typing for instances of Organization
in $p$. Let us try again, but this time we will prove such typing.
In [13]:
data_typing = {
'ENS Lyon': rhs_clone,
'UN': "Organization",
'INTERPOL': 'Organization'
}
In [14]:
rhs_instance = graph.rewrite_schema(rule, data_typing=data_typing, strict=True)
In [15]:
print("Instance of the RHS in G", rhs_instance)
Let us relabel nodes in T
.
In [16]:
graph.relabel_schema_node(rhs_instance[rhs_clone], 'University')
graph.relabel_schema_node(rhs_instance["Organization"], "International_Organization")
In [17]:
print(json.dumps(graph.get_data_typing(), indent=" "))
Let us first consider a small example of forward propagation. We will create a rule that performs some additions of new nodes not typed by schema.
In [18]:
pattern = NXGraph()
pattern.add_nodes_from(["a", "b", "c"])
pattern.add_edges_from([
("a", "b", {"type": "colleague"}),
("a", "c")
])
rule = Rule.from_transform(pattern)
rule.inject_remove_edge("a", "c")
rule.inject_add_node("Crime_Division")
rule.inject_add_edge("Crime_Division", "c", {"type": "part_of"})
rule.inject_add_edge("a", "Crime_Division")
rule.inject_add_edge("b", "Crime_Division")
plot_rule(rule)
We have created a rule that clones the node a
and reconnects the edges between a
and b
.
In [19]:
pattern_typing = {
"a": "Person",
"b": "Person",
"c": "International_Organization"
}
instances = graph.find_data_matching(pattern, pattern_typing=pattern_typing)
print("Instances:")
for instance in instances:
print(instance)
We rewrite the graph shapes
with the fixed instances (so, the node circle
is cloned).
In [20]:
rhs_instance = graph.rewrite_data(rule, instance=instances[0])
In [21]:
print("RHS instance: ", rhs_instance)
To type the new node 'Crime_Division', we have created a new node in the schema.
In [22]:
schema_node = graph.get_node_type(rhs_instance["Crime_Division"])
graph.relabel_schema_node(schema_node, "Division")
In [23]:
print("Schema nodes: ", graph.get_schema_nodes())
print("Schema edges: ", graph.get_schema_edges())
Now, let us consider an example of backward propagation. We will clone the node Person
in the schema into a Child
and Adult
. We will determine which instances of Person
are typed by Child
or Adult
by looking a the age attribute.
In [24]:
pattern = NXGraph()
pattern.add_nodes_from(["Person"])
pattern.add_edges_from([("Person", "Person", {"type": {"friend", "parent", "colleague"}})])
interface = NXGraph()
interface.add_nodes_from(["Adult", "Child"])
interface.add_edges_from([
("Adult", "Adult", {"type": {"friend", "parent", "colleague"}}),
("Child", "Child", {"type": {"friend"}}),
("Adult", "Child", {"type": {"friend", "parent"}}),
("Child", "Adult", {"type": {"friend"}}),
])
rule = Rule(p=interface, lhs=pattern, p_lhs={"Adult": "Person", "Child": "Person"})
Let us determine which instances of Person
are typed by Child
or Adult
as follows.
In [25]:
data_typing = {}
persons = graph.get_instances("Person")
for p in persons:
p_attrs = graph.get_data_node(p)
if "age" in p_attrs:
age = list(p_attrs["age"])[0]
if age > 18:
data_typing[p] = "Adult"
else:
data_typing[p] = "Child"
print("Data typing: ", data_typing)
In [26]:
rhs_instance = graph.rewrite_schema(rule, data_typing=data_typing)
print(rhs_instance)
In [27]:
graph.relabel_schema_node(rhs_instance["Adult"], "Adult")
graph.relabel_schema_node(rhs_instance["Child"], "Child")
In [28]:
print("Schema nodes: ", graph.get_schema_nodes())
print("Schema edges: ")
for s, t in graph.get_schema_edges():
print("\t", s, "->", t)
In [29]:
print("Data nodes: ", graph.get_data_nodes())
print("Data edges: ")
for s, t, attrs in graph.get_data_edges(data=True):
print("\t", s, "->", t, attrs)
Observe that we have cloned the node Bob
into two nodes Bob
and Bob1
, one being an instance of Adult
and another of Child
.
In [30]:
print(json.dumps(graph.get_data_typing(), indent=" "))