In [1]:
from neo4j.v1 import GraphDatabase, basic_auth

In [2]:
driver = GraphDatabase.driver("bolt://localhost", 
                              auth=basic_auth("neo4j", "theraturkey"))
session = driver.session()

In [3]:
session.run("CREATE (a:Person {name:'Arthur', title:'King'})")

result = session.run("MATCH (a:Person) WHERE a.name = 'Arthur' RETURN a.name AS name, a.title AS title")
for record in result:
    print("%s %s" % (record["title"], record["name"]))

session.close()


King Arthur

In [ ]:
session = driver.session()

In [ ]:
command = """
    LOAD CSV WITH HEADERS FROM 'file:///Users/janet/Neo4j_meta4/data/network_broken.tsv' 
    AS row FIELDTERMINATOR '\t'
    """
result = session.run(command)

In [ ]:
command = """
    LOAD CSV FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/a147631215456d86a77edc7ec08c128b9ef05e3b/people_leading_empty.csv"
      AS line
    WITH line LIMIT 4
    RETURN line
    """
result = session.run(command.rstrip())

In [ ]:
for record in result:
    print(record)

http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv

Note that the csv URL they give is not longer valid.

LOAD CSV FROM 'http://neo4j.com/docs/3.0.7/csv/artists-fieldterminator.csv' AS line FIELDTERMINATOR ';' CREATE (:Artist { name: line[1], year: toInt(line[2])})

Data we are loading:

PersonName "Company Name" year Kenny Bastani "Neo Technology" 2013 Michael Hunger "Neo Technology" 2010 James Ward "Heroku" 2011 Someone John "Doe.com" "ninetynine"


In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
command = """
    LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
    AS line 
    FIELDTERMINATOR '\t'
    CREATE (:Artist)
    """
#CREATE (:Artist {company:line[1], year: toInt(line[2])})
result = session.run(command.rstrip())

for record in result:
    print(record)

In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv
command = """
    LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
    AS line 
    FIELDTERMINATOR '\t'
    CREATE (:Artist {company:line['Company Name'], year:line.year})
    """
#CREATE (:Artist {company:line[1], year: toInt(line[2])})
result = session.run(command.rstrip())

for record in result:
    print(record)

In [ ]:
result = session.run(""" match(n) return n """)

for record in result:
    print(record)

In [ ]:
# http://neo4j.com/docs/developer-manual/current/cypher/#query-load-csv

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM "https://gist.githubusercontent.com/jexp/d788e117129c3730a042/raw/1bd8c19bf8b49d9eb7149918cc11a34faf996dd8/people.tsv"
    AS line 
    FIELDTERMINATOR '\t'
    CREATE (:Artist {company:line['Company Name'], year:line.year})
    """
session.run(command.rstrip())

result = session.run(""" match(n) return n """)

for record in result:
    print(record)

for record in result:
    print(record)

In [ ]:
! head /Users/janet/Neo4j_meta4/data/network_broken.tsv

In [ ]:
session.run('dbms.security.allow_csv_import_from_file_urls=true')

In [ ]:
#  TRY ON OUR DATA 

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken.tsv' 
    AS line 
    FIELDTERMINATOR '\t'
    WITH line LIMIT 4
    RETURN line
    """
result = session.run(command.rstrip())
for record in result:
    print(record)
    
print('----------------------')

result = session.run(""" match(n) return n """)
for record in result:
    print(record)

TRY ON OUR DATA

Start with the simplest: a csv with only 20 rows.

Next progress to the csv with all the rows:

Finally, try the .tsv with all the rows. We will want tsv because

Last, see if I can do it from local file, not GitHub file.


In [ ]:
# Show that I can load the 20-line CSV. 

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    RETURN line
    """
result = session.run(command.rstrip())
for record in result:
    print(record)
    
print('----------------------')

result = session.run(""" match(n) return n """)
for record in result:
    print(record)

http://stackoverflow.com/questions/25090255/how-to-create-unique-nodes-and-relationships-by-csv-file-imported-in-neo4j

LOAD CSV FROM 'file:///home/nate/Downloads/file.csv' AS line MERGE (n:A {number : line[0]}) WITH line, n MERGE (m:B {ID : line[1]}) WITH m,n MERGE (n)-[:LIKES]->(m);


In [ ]:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV FROM //WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (n:A {col1 : line[0], col2: line[1], col3: line[2]})
    WITH line, n
    MERGE (m:B {ID : line[1]})
    WITH m,n
    MERGE (n)-[:LIKES]->(m);
    """
result = session.run(command.rstrip())

num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of results from building: {}'.format(num_results))
    
#print('----------------------')

result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of results from querying: {}'.format(num_results))

Go through and define all the nodes first, then add edges by iterating over each line?


In [6]:
# Makes duplicate nodes:

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    CREATE (:Gene {source:line.source})
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=36 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=37 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=38 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=39 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=40 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=41 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=42 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=43 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=44 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=45 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=46 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=47 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=48 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=49 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=50 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=51 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=52 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=53 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=54 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=55 labels={'Gene'} properties={'source': 'Ga0081629_12073'}>>
number of nodes added: 20

In [8]:
# Makes duplicate nodes:

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    CREATE (:Gene {source:line.source})
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=56 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=57 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=58 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=59 labels={'Gene'} properties={'source': 'Ga0081607_11219'}>>
<Record n=<Node id=60 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=61 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=62 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=63 labels={'Gene'} properties={'source': 'Ga0081607_108214'}>>
<Record n=<Node id=64 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=65 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=66 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=67 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=68 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=69 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=70 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=71 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=72 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=73 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=74 labels={'Gene'} properties={'source': 'Ga0081607_115213'}>>
<Record n=<Node id=75 labels={'Gene'} properties={'source': 'Ga0081629_12073'}>>
number of nodes added: 20

http://blog.comperiosearch.com/blog/2015/02/04/csv-import-tricks-neo4j/

USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM “file://blabla.csv” AS csvLine MATCH (uf :USER { twitter_name: toInt(csvLine.follower_name) }) MATCH (u :USER { twitter_name: toInt(csvLine.twitter_name) }) MERGE uf -[:FOLLOWS]-> u


In [26]:
# Makes 0 nodes, but it was really close.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MATCH (g:Gene {source:line.source})
    MATCH (g2:Gene {target:line.target})
    MERGE (g) -[:x]-> (g2) // Parentheses are required to identify nodes in patterns, i.e. (g) 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added: {}'.format(num_results))

# ------------------------------------------------------------------------


number of nodes added: 0

http://blog.comperiosearch.com/blog/2015/02/04/csv-import-tricks-neo4j/

USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM “file://blabla.csv” AS csvLine WITH toInt(csvLine.twitterid) as twitterid, csvLine WHERE twitterid IS NOT NULL MERGE (u :USER { twitterid: twitterid }) ON CREATE SET u.seat=toInt(csvLine.seat), u.firstname=csvLine.first name, u.lastname=csvLine.lastname, u.party=csvLine.party, u.region=csvLine.region, u.type = csvLine.type


In [14]:
# Makes 4 nodes.  This is a good start!!

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# make new stuff:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g:Gene {gene:line.source}) ON CREATE SET
    g.organism=line.source_organism 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=84 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=85 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=86 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=87 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
number of nodes added: 4

In [18]:
# Aim to make all the nodes.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g:Gene {gene:line.source}) ON CREATE SET
    g.organism=line.source_organism 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source column: {}'.format(num_results))
print('-----------------------')

# ------------------------------------------------------------------------

# add the sink nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g:Gene {gene:line.target}) ON CREATE SET
    g.organism=line.target_organism 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes after adding dest column: {}'.format(num_results))


<Record n=<Node id=128 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=129 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=130 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=131 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
number of nodes added from source column: 4
-----------------------
<Record n=<Node id=128 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=129 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=130 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=131 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
<Record n=<Node id=132 labels={'Gene'} properties={'gene': 'Ga0081607_115212', 'organism': 'Ga0081607'}>>
<Record n=<Node id=133 labels={'Gene'} properties={'gene': 'Ga0081607_116221', 'organism': 'Ga0081607'}>>
<Record n=<Node id=134 labels={'Gene'} properties={'gene': 'Ga0081607_107914', 'organism': 'Ga0081607'}>>
<Record n=<Node id=135 labels={'Gene'} properties={'gene': 'Ga0081607_11235', 'organism': 'Ga0081607'}>>
<Record n=<Node id=136 labels={'Gene'} properties={'gene': 'Ga0081607_10433', 'organism': 'Ga0081607'}>>
<Record n=<Node id=137 labels={'Gene'} properties={'gene': 'Ga0081607_12364', 'organism': 'Ga0081607'}>>
<Record n=<Node id=138 labels={'Gene'} properties={'gene': 'Ga0081629_10591', 'organism': 'Ga0081629'}>>
<Record n=<Node id=139 labels={'Gene'} properties={'gene': 'Ga0081607_113519', 'organism': 'Ga0081607'}>>
<Record n=<Node id=140 labels={'Gene'} properties={'gene': 'Ga0081607_12485', 'organism': 'Ga0081607'}>>
<Record n=<Node id=141 labels={'Gene'} properties={'gene': 'Ga0081607_11288', 'organism': 'Ga0081607'}>>
<Record n=<Node id=142 labels={'Gene'} properties={'gene': 'Ga0081607_118613', 'organism': 'Ga0081607'}>>
<Record n=<Node id=143 labels={'Gene'} properties={'gene': 'Ga0081607_12308', 'organism': 'Ga0081607'}>>
<Record n=<Node id=144 labels={'Gene'} properties={'gene': 'Ga0081607_115211', 'organism': 'Ga0081607'}>>
<Record n=<Node id=145 labels={'Gene'} properties={'gene': 'Ga0081607_11521', 'organism': 'Ga0081607'}>>
<Record n=<Node id=146 labels={'Gene'} properties={'gene': 'Ga0081607_104311', 'organism': 'Ga0081607'}>>
<Record n=<Node id=147 labels={'Gene'} properties={'gene': 'Ga0081607_11876', 'organism': 'Ga0081607'}>>
number of nodes after adding dest column: 20

In [22]:
# WORKS!

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g:Gene {gene:line.source}) ON CREATE SET
    g.organism=line.source_organism 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source column: {}'.format(num_results))

# ------------------------------------------------------------------------

# add the sink nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g:Gene {gene:line.target}) ON CREATE SET
    g.organism=line.target_organism 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes after adding dest column: {}'.format(num_results))

# ------------------------------------------------------------------------

# add the edges:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MATCH (g1:Gene {gene:line.source})
    MATCH (g2:Gene {gene:line.target})
    MERGE (g1) -[:ABCD]-> (g2); 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) -[e]->(m) return e """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from dest column: {}'.format(num_results))


<Record n=<Node id=193 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=194 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=195 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=196 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
number of nodes added from source column: 4
<Record n=<Node id=193 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=194 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=195 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=196 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
<Record n=<Node id=197 labels={'Gene'} properties={'gene': 'Ga0081607_115212', 'organism': 'Ga0081607'}>>
<Record n=<Node id=198 labels={'Gene'} properties={'gene': 'Ga0081607_116221', 'organism': 'Ga0081607'}>>
<Record n=<Node id=199 labels={'Gene'} properties={'gene': 'Ga0081607_107914', 'organism': 'Ga0081607'}>>
<Record n=<Node id=200 labels={'Gene'} properties={'gene': 'Ga0081607_11235', 'organism': 'Ga0081607'}>>
<Record n=<Node id=201 labels={'Gene'} properties={'gene': 'Ga0081607_10433', 'organism': 'Ga0081607'}>>
<Record n=<Node id=202 labels={'Gene'} properties={'gene': 'Ga0081607_12364', 'organism': 'Ga0081607'}>>
<Record n=<Node id=203 labels={'Gene'} properties={'gene': 'Ga0081629_10591', 'organism': 'Ga0081629'}>>
<Record n=<Node id=204 labels={'Gene'} properties={'gene': 'Ga0081607_113519', 'organism': 'Ga0081607'}>>
<Record n=<Node id=205 labels={'Gene'} properties={'gene': 'Ga0081607_12485', 'organism': 'Ga0081607'}>>
<Record n=<Node id=206 labels={'Gene'} properties={'gene': 'Ga0081607_11288', 'organism': 'Ga0081607'}>>
<Record n=<Node id=207 labels={'Gene'} properties={'gene': 'Ga0081607_118613', 'organism': 'Ga0081607'}>>
<Record n=<Node id=208 labels={'Gene'} properties={'gene': 'Ga0081607_12308', 'organism': 'Ga0081607'}>>
<Record n=<Node id=209 labels={'Gene'} properties={'gene': 'Ga0081607_115211', 'organism': 'Ga0081607'}>>
<Record n=<Node id=210 labels={'Gene'} properties={'gene': 'Ga0081607_11521', 'organism': 'Ga0081607'}>>
<Record n=<Node id=211 labels={'Gene'} properties={'gene': 'Ga0081607_104311', 'organism': 'Ga0081607'}>>
<Record n=<Node id=212 labels={'Gene'} properties={'gene': 'Ga0081607_11876', 'organism': 'Ga0081607'}>>
number of nodes after adding dest column: 20
<Record e=<Relationship id=3 start=193 end=195 type='ABCD' properties={}>>
<Record e=<Relationship id=2 start=193 end=199 type='ABCD' properties={}>>
<Record e=<Relationship id=1 start=193 end=198 type='ABCD' properties={}>>
<Record e=<Relationship id=0 start=193 end=197 type='ABCD' properties={}>>
<Record e=<Relationship id=7 start=194 end=203 type='ABCD' properties={}>>
<Record e=<Relationship id=6 start=194 end=202 type='ABCD' properties={}>>
<Record e=<Relationship id=5 start=194 end=201 type='ABCD' properties={}>>
<Record e=<Relationship id=4 start=194 end=200 type='ABCD' properties={}>>
<Record e=<Relationship id=18 start=195 end=211 type='ABCD' properties={}>>
<Record e=<Relationship id=16 start=195 end=210 type='ABCD' properties={}>>
<Record e=<Relationship id=17 start=195 end=198 type='ABCD' properties={}>>
<Record e=<Relationship id=15 start=195 end=197 type='ABCD' properties={}>>
<Record e=<Relationship id=14 start=195 end=209 type='ABCD' properties={}>>
<Record e=<Relationship id=13 start=195 end=193 type='ABCD' properties={}>>
<Record e=<Relationship id=12 start=195 end=208 type='ABCD' properties={}>>
<Record e=<Relationship id=11 start=195 end=207 type='ABCD' properties={}>>
<Record e=<Relationship id=10 start=195 end=206 type='ABCD' properties={}>>
<Record e=<Relationship id=9 start=195 end=205 type='ABCD' properties={}>>
<Record e=<Relationship id=8 start=195 end=204 type='ABCD' properties={}>>
<Record e=<Relationship id=19 start=196 end=212 type='ABCD' properties={}>>
number of nodes added from dest column: 20

In [25]:
# Do it in one Cypher call.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g1:Gene {gene:line.source}) ON CREATE SET
    g1.organism=line.source_organism 
    MERGE (g2:Gene {gene:line.target}) ON CREATE SET
    g2.organism=line.target_organism 
    MERGE (g1) -[:ABCD]-> (g2); 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=233 labels={'Gene'} properties={'gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=234 labels={'Gene'} properties={'gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=235 labels={'Gene'} properties={'gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=236 labels={'Gene'} properties={'gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
<Record n=<Node id=237 labels={'Gene'} properties={'gene': 'Ga0081607_115212', 'organism': 'Ga0081607'}>>
<Record n=<Node id=238 labels={'Gene'} properties={'gene': 'Ga0081607_116221', 'organism': 'Ga0081607'}>>
<Record n=<Node id=239 labels={'Gene'} properties={'gene': 'Ga0081607_107914', 'organism': 'Ga0081607'}>>
<Record n=<Node id=240 labels={'Gene'} properties={'gene': 'Ga0081607_11235', 'organism': 'Ga0081607'}>>
<Record n=<Node id=241 labels={'Gene'} properties={'gene': 'Ga0081607_10433', 'organism': 'Ga0081607'}>>
<Record n=<Node id=242 labels={'Gene'} properties={'gene': 'Ga0081607_12364', 'organism': 'Ga0081607'}>>
<Record n=<Node id=243 labels={'Gene'} properties={'gene': 'Ga0081629_10591', 'organism': 'Ga0081629'}>>
<Record n=<Node id=244 labels={'Gene'} properties={'gene': 'Ga0081607_113519', 'organism': 'Ga0081607'}>>
<Record n=<Node id=245 labels={'Gene'} properties={'gene': 'Ga0081607_12485', 'organism': 'Ga0081607'}>>
<Record n=<Node id=246 labels={'Gene'} properties={'gene': 'Ga0081607_11288', 'organism': 'Ga0081607'}>>
<Record n=<Node id=247 labels={'Gene'} properties={'gene': 'Ga0081607_118613', 'organism': 'Ga0081607'}>>
<Record n=<Node id=248 labels={'Gene'} properties={'gene': 'Ga0081607_12308', 'organism': 'Ga0081607'}>>
<Record n=<Node id=249 labels={'Gene'} properties={'gene': 'Ga0081607_115211', 'organism': 'Ga0081607'}>>
<Record n=<Node id=250 labels={'Gene'} properties={'gene': 'Ga0081607_11521', 'organism': 'Ga0081607'}>>
<Record n=<Node id=251 labels={'Gene'} properties={'gene': 'Ga0081607_104311', 'organism': 'Ga0081607'}>>
<Record n=<Node id=252 labels={'Gene'} properties={'gene': 'Ga0081607_11876', 'organism': 'Ga0081607'}>>
number of nodes added from source and dest column: 20

What is different about this call realtive to the one that didn't work before?


In [39]:
# Fill in more attributes.  You can put the attributes in the dict-like thing.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g1:Gene {org_gene:line.source, 
                    organism:line.source_organism, 
                    gene:line.source_gene}) 
    MERGE (g2:Gene {org_gene:line.target}) ON CREATE SET
    g2.organism=line.target_organism, g2.gene=line.target_gene 
    MERGE (g1) -[:ABCD]-> (g2); 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=435 labels={'Gene'} properties={'gene': '11219', 'org_gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=436 labels={'Gene'} properties={'gene': '108214', 'org_gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=437 labels={'Gene'} properties={'gene': '115213', 'org_gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=438 labels={'Gene'} properties={'gene': '12073', 'org_gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
<Record n=<Node id=439 labels={'Gene'} properties={'gene': '115212', 'org_gene': 'Ga0081607_115212', 'organism': 'Ga0081607'}>>
<Record n=<Node id=440 labels={'Gene'} properties={'gene': '116221', 'org_gene': 'Ga0081607_116221', 'organism': 'Ga0081607'}>>
<Record n=<Node id=441 labels={'Gene'} properties={'gene': '107914', 'org_gene': 'Ga0081607_107914', 'organism': 'Ga0081607'}>>
<Record n=<Node id=442 labels={'Gene'} properties={'gene': '11235', 'org_gene': 'Ga0081607_11235', 'organism': 'Ga0081607'}>>
<Record n=<Node id=443 labels={'Gene'} properties={'gene': '10433', 'org_gene': 'Ga0081607_10433', 'organism': 'Ga0081607'}>>
<Record n=<Node id=444 labels={'Gene'} properties={'gene': '12364', 'org_gene': 'Ga0081607_12364', 'organism': 'Ga0081607'}>>
<Record n=<Node id=445 labels={'Gene'} properties={'gene': '10591', 'org_gene': 'Ga0081629_10591', 'organism': 'Ga0081629'}>>
<Record n=<Node id=446 labels={'Gene'} properties={'gene': '113519', 'org_gene': 'Ga0081607_113519', 'organism': 'Ga0081607'}>>
<Record n=<Node id=447 labels={'Gene'} properties={'gene': '12485', 'org_gene': 'Ga0081607_12485', 'organism': 'Ga0081607'}>>
<Record n=<Node id=448 labels={'Gene'} properties={'gene': '11288', 'org_gene': 'Ga0081607_11288', 'organism': 'Ga0081607'}>>
<Record n=<Node id=449 labels={'Gene'} properties={'gene': '118613', 'org_gene': 'Ga0081607_118613', 'organism': 'Ga0081607'}>>
<Record n=<Node id=450 labels={'Gene'} properties={'gene': '12308', 'org_gene': 'Ga0081607_12308', 'organism': 'Ga0081607'}>>
<Record n=<Node id=451 labels={'Gene'} properties={'gene': '115211', 'org_gene': 'Ga0081607_115211', 'organism': 'Ga0081607'}>>
<Record n=<Node id=452 labels={'Gene'} properties={'gene': '11521', 'org_gene': 'Ga0081607_11521', 'organism': 'Ga0081607'}>>
<Record n=<Node id=453 labels={'Gene'} properties={'gene': '104311', 'org_gene': 'Ga0081607_104311', 'organism': 'Ga0081607'}>>
<Record n=<Node id=454 labels={'Gene'} properties={'gene': '11876', 'org_gene': 'Ga0081607_11876', 'organism': 'Ga0081607'}>>
number of nodes added from source and dest column: 20

In [49]:
# Fill in more attributes.  Add in edges with attributes from the line.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g1:Gene {org_gene:line.source, 
                    organism:line.source_organism, 
                    gene:line.source_gene}) 
    MERGE (g2:Gene {org_gene:line.target,
                    organism:line.target_organism,
                    gene:line.target_gene}) 
    MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2); 
    """
result = session.run(command.rstrip())

# There might be better ways to do this... 
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
command = """
    match (n {organism:'Ga0081607'})
    set n :organism_A
    return n
"""
result = session.run(command.rstrip())
command = """
    match (n {organism:'Ga0081629'})
    set n :organism_B
    return n
"""
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))

# ------------------------------------------------------------------------


<Record n=<Node id=595 labels={'organism_A', 'Gene'} properties={'gene': '11219', 'org_gene': 'Ga0081607_11219', 'organism': 'Ga0081607'}>>
<Record n=<Node id=596 labels={'organism_A', 'Gene'} properties={'gene': '108214', 'org_gene': 'Ga0081607_108214', 'organism': 'Ga0081607'}>>
<Record n=<Node id=597 labels={'organism_A', 'Gene'} properties={'gene': '115213', 'org_gene': 'Ga0081607_115213', 'organism': 'Ga0081607'}>>
<Record n=<Node id=598 labels={'organism_B', 'Gene'} properties={'gene': '12073', 'org_gene': 'Ga0081629_12073', 'organism': 'Ga0081629'}>>
<Record n=<Node id=599 labels={'organism_A', 'Gene'} properties={'gene': '115212', 'org_gene': 'Ga0081607_115212', 'organism': 'Ga0081607'}>>
<Record n=<Node id=600 labels={'organism_A', 'Gene'} properties={'gene': '116221', 'org_gene': 'Ga0081607_116221', 'organism': 'Ga0081607'}>>
<Record n=<Node id=601 labels={'organism_A', 'Gene'} properties={'gene': '107914', 'org_gene': 'Ga0081607_107914', 'organism': 'Ga0081607'}>>
<Record n=<Node id=602 labels={'organism_A', 'Gene'} properties={'gene': '11235', 'org_gene': 'Ga0081607_11235', 'organism': 'Ga0081607'}>>
<Record n=<Node id=603 labels={'organism_A', 'Gene'} properties={'gene': '10433', 'org_gene': 'Ga0081607_10433', 'organism': 'Ga0081607'}>>
<Record n=<Node id=604 labels={'organism_A', 'Gene'} properties={'gene': '12364', 'org_gene': 'Ga0081607_12364', 'organism': 'Ga0081607'}>>
<Record n=<Node id=605 labels={'organism_B', 'Gene'} properties={'gene': '10591', 'org_gene': 'Ga0081629_10591', 'organism': 'Ga0081629'}>>
<Record n=<Node id=606 labels={'organism_A', 'Gene'} properties={'gene': '113519', 'org_gene': 'Ga0081607_113519', 'organism': 'Ga0081607'}>>
<Record n=<Node id=607 labels={'organism_A', 'Gene'} properties={'gene': '12485', 'org_gene': 'Ga0081607_12485', 'organism': 'Ga0081607'}>>
<Record n=<Node id=608 labels={'organism_A', 'Gene'} properties={'gene': '11288', 'org_gene': 'Ga0081607_11288', 'organism': 'Ga0081607'}>>
<Record n=<Node id=609 labels={'organism_A', 'Gene'} properties={'gene': '118613', 'org_gene': 'Ga0081607_118613', 'organism': 'Ga0081607'}>>
<Record n=<Node id=610 labels={'organism_A', 'Gene'} properties={'gene': '12308', 'org_gene': 'Ga0081607_12308', 'organism': 'Ga0081607'}>>
<Record n=<Node id=611 labels={'organism_A', 'Gene'} properties={'gene': '115211', 'org_gene': 'Ga0081607_115211', 'organism': 'Ga0081607'}>>
<Record n=<Node id=612 labels={'organism_A', 'Gene'} properties={'gene': '11521', 'org_gene': 'Ga0081607_11521', 'organism': 'Ga0081607'}>>
<Record n=<Node id=613 labels={'organism_A', 'Gene'} properties={'gene': '104311', 'org_gene': 'Ga0081607_104311', 'organism': 'Ga0081607'}>>
<Record n=<Node id=614 labels={'organism_A', 'Gene'} properties={'gene': '11876', 'org_gene': 'Ga0081607_11876', 'organism': 'Ga0081607'}>>
number of nodes added from source and dest column: 20

In [ ]:
# Fill in more attributes.  Add in edges with attributes from the line.

# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

# add the source nodes, if they don't already exist:
command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/network_broken_20.csv'  
    AS line
    MERGE (g1:Gene {org_gene:line.source, 
                    organism:line.source_organism, 
                    gene:line.source_gene}) 
    MERGE (g2:Gene {org_gene:line.target,
                    organism:line.target_organism,
                    gene:line.target_gene}) 
    MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2); 
    """
result = session.run(command.rstrip())

# See how many nodes I got. 
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes added from source and dest column: {}'.format(num_results))

# -----------------------------------------------------------------------