In [1]:
from neo4j.v1 import GraphDatabase, basic_auth
In [2]:
driver = GraphDatabase.driver("bolt://localhost",
auth=basic_auth("neo4j", "theraturkey"))
session = driver.session()
In [3]:
session.run("CREATE (a:Person {name:'Arthur', title:'King'})")
result = session.run("MATCH (a:Person) WHERE a.name = 'Arthur' RETURN a.name AS name, a.title AS title")
for record in result:
print("%s %s" % (record["title"], record["name"]))
session.close()
In [ ]:
session = driver.session()
In [ ]:
command = """
LOAD CSV WITH HEADERS FROM 'file:///Users/janet/Neo4j_meta4/data/network_broken.tsv'
AS row FIELDTERMINATOR '\t'
"""
result = session.run(command)
In [ ]:
command = """
LOAD CSV FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/a147631215456d86a77edc7ec08c128b9ef05e3b/people_leading_empty.csv"
AS line
WITH line LIMIT 4
RETURN line
"""
result = session.run(command.rstrip())
In [ ]:
for record in result:
print(record)
http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
Note that the csv URL they give is not longer valid.
LOAD CSV FROM 'http://neo4j.com/docs/3.0.7/csv/artists-fieldterminator.csv' AS line FIELDTERMINATOR
';'
CREATE (:Artist { name: line[1], year: toInt(line[2])})
Data we are loading:
PersonName "Company Name" year
Kenny Bastani "Neo Technology" 2013
Michael Hunger "Neo Technology" 2010
James Ward "Heroku" 2011
Someone
John "Doe.com" "ninetynine"
In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
command = """
LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
AS line
FIELDTERMINATOR '\t'
CREATE (:Artist)
"""
#CREATE (:Artist {company:line[1], year: toInt(line[2])})
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
command = """
LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
AS line
FIELDTERMINATOR '\t'
CREATE (:Artist {company:line['Company Name'], year:line.year})
"""
#CREATE (:Artist {company:line[1], year: toInt(line[2])})
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
result = session.run(""" match(n) return n """)
for record in result:
print(record)
In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
AS line
FIELDTERMINATOR '\t'
CREATE (:Artist {company:line['Company Name'], year:line.year})
"""
session.run(command.rstrip())
result = session.run(""" match(n) return n """)
for record in result:
print(record)
for record in result:
print(record)
In [ ]:
! head /Users/janet/Neo4j_meta4/data/network_broken.tsv
In [ ]:
session.run('dbms.security.allow_csv_import_from_file_urls=true')
In [ ]:
# TRY ON OUR DATA
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken.tsv'
AS line
FIELDTERMINATOR '\t'
WITH line LIMIT 4
RETURN line
"""
result = session.run(command.rstrip())
for record in result:
print(record)
print('----------------------')
result = session.run(""" match(n) return n """)
for record in result:
print(record)
In [ ]:
# Show that I can load the 20-line CSV.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
RETURN line
"""
result = session.run(command.rstrip())
for record in result:
print(record)
print('----------------------')
result = session.run(""" match(n) return n """)
for record in result:
print(record)
LOAD CSV FROM 'file:///home/nate/Downloads/file.csv' AS line
MERGE (n:A {number : line[0]})
WITH line, n
MERGE (m:B {ID : line[1]})
WITH m,n
MERGE (n)-[:LIKES]->(m);
In [ ]:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV FROM //WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (n:A {col1 : line[0], col2: line[1], col3: line[2]})
WITH line, n
MERGE (m:B {ID : line[1]})
WITH m,n
MERGE (n)-[:LIKES]->(m);
"""
result = session.run(command.rstrip())
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of results from building: {}'.format(num_results))
#print('----------------------')
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of results from querying: {}'.format(num_results))
Go through and define all the nodes first, then add edges by iterating over each line?
In [6]:
# Makes duplicate nodes:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
CREATE (:Gene {source:line.source})
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added: {}'.format(num_results))
# ------------------------------------------------------------------------
In [8]:
# Makes duplicate nodes:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
CREATE (:Gene {source:line.source})
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added: {}'.format(num_results))
# ------------------------------------------------------------------------
http://blog.comperiosearch.com/blog/2015/02/04/csv-import-tricks-neo4j/
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM “file://blabla.csv” AS csvLine
MATCH (uf :USER { twitter_name: toInt(csvLine.follower_name) })
MATCH (u :USER { twitter_name: toInt(csvLine.twitter_name) })
MERGE uf -[:FOLLOWS]-> u
In [26]:
# Makes 0 nodes, but it was really close.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MATCH (g:Gene {source:line.source})
MATCH (g2:Gene {target:line.target})
MERGE (g) -[:x]-> (g2) // Parentheses are required to identify nodes in patterns, i.e. (g)
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added: {}'.format(num_results))
# ------------------------------------------------------------------------
http://blog.comperiosearch.com/blog/2015/02/04/csv-import-tricks-neo4j/
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM “file://blabla.csv” AS csvLine
WITH toInt(csvLine.twitterid) as twitterid, csvLine
WHERE twitterid IS NOT NULL
MERGE (u :USER { twitterid: twitterid }) ON CREATE SET
u.seat=toInt(csvLine.seat), u.firstname=csvLine.first name, u.lastname=csvLine.lastname, u.party=csvLine.party, u.region=csvLine.region, u.type = csvLine.type
In [14]:
# Makes 4 nodes. This is a good start!!
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# make new stuff:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g:Gene {gene:line.source}) ON CREATE SET
g.organism=line.source_organism
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added: {}'.format(num_results))
# ------------------------------------------------------------------------
In [18]:
# Aim to make all the nodes.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g:Gene {gene:line.source}) ON CREATE SET
g.organism=line.source_organism
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source column: {}'.format(num_results))
print('-----------------------')
# ------------------------------------------------------------------------
# add the sink nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g:Gene {gene:line.target}) ON CREATE SET
g.organism=line.target_organism
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes after adding dest column: {}'.format(num_results))
In [22]:
# WORKS!
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g:Gene {gene:line.source}) ON CREATE SET
g.organism=line.source_organism
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source column: {}'.format(num_results))
# ------------------------------------------------------------------------
# add the sink nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g:Gene {gene:line.target}) ON CREATE SET
g.organism=line.target_organism
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes after adding dest column: {}'.format(num_results))
# ------------------------------------------------------------------------
# add the edges:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MATCH (g1:Gene {gene:line.source})
MATCH (g2:Gene {gene:line.target})
MERGE (g1) -[:ABCD]-> (g2);
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) -[e]->(m) return e """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from dest column: {}'.format(num_results))
In [25]:
# Do it in one Cypher call.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g1:Gene {gene:line.source}) ON CREATE SET
g1.organism=line.source_organism
MERGE (g2:Gene {gene:line.target}) ON CREATE SET
g2.organism=line.target_organism
MERGE (g1) -[:ABCD]-> (g2);
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))
# ------------------------------------------------------------------------
What is different about this call realtive to the one that didn't work before?
In [39]:
# Fill in more attributes. You can put the attributes in the dict-like thing.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g1:Gene {org_gene:line.source,
organism:line.source_organism,
gene:line.source_gene})
MERGE (g2:Gene {org_gene:line.target}) ON CREATE SET
g2.organism=line.target_organism, g2.gene=line.target_gene
MERGE (g1) -[:ABCD]-> (g2);
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))
# ------------------------------------------------------------------------
In [49]:
# Fill in more attributes. Add in edges with attributes from the line.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g1:Gene {org_gene:line.source,
organism:line.source_organism,
gene:line.source_gene})
MERGE (g2:Gene {org_gene:line.target,
organism:line.target_organism,
gene:line.target_gene})
MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2);
"""
result = session.run(command.rstrip())
# There might be better ways to do this...
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
command = """
match (n {organism:'Ga0081607'})
set n :organism_A
return n
"""
result = session.run(command.rstrip())
command = """
match (n {organism:'Ga0081629'})
set n :organism_B
return n
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))
# ------------------------------------------------------------------------
In [ ]:
# Fill in more attributes. Add in edges with attributes from the line.
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
# add the source nodes, if they don't already exist:
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'
AS line
MERGE (g1:Gene {org_gene:line.source,
organism:line.source_organism,
gene:line.source_gene})
MERGE (g2:Gene {org_gene:line.target,
organism:line.target_organism,
gene:line.target_gene})
MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2);
"""
result = session.run(command.rstrip())
# See how many nodes I got.
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))
# -----------------------------------------------------------------------