In [ ]:
from neo4j.v1 import GraphDatabase, basic_auth
import pandas as pd
In [ ]:
driver = GraphDatabase.driver("bolt://localhost/",
auth=basic_auth("neo4j", "theraturkey"))
session = driver.session()
Useful Commands:
In [ ]:
filename = '../data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'
In [ ]:
filename
In [ ]:
! ls ../data/Methylobacter--Methylotenera
In [ ]:
df = pd.read_csv(filename, sep='\t')
In [ ]:
df.head(4)
In [ ]:
df.shape
In [ ]:
# Calculate expected number of nodes:
nodes = len(set(df['source_locus_tag'].drop_duplicates().tolist() + df['target_locus_tag'].drop_duplicates().tolist()))
print("{} nodes are expected".format(nodes))
In [ ]:
org_names = set(df['source_organism_name'].drop_duplicates().tolist() +
df['target_organism_name'].drop_duplicates().tolist())
count = len(org_names)
if count != 2:
print("Expected exactly 2 organsm names, but we have {}".format(count))
print(org_names)
In [ ]:
org_names
In [ ]:
df.head(2)
In [ ]:
%matplotlib inline
df.weight.hist()
In [ ]:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)
result = session.run(""" MATCH (n) RETURN n """)
num_results = 0
for record in result:
num_results += 1
print(record)
print('number of nodes before starting: {}'.format(num_results))
command = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'
AS line FIELDTERMINATOR '\t'
MERGE (g1:Gene {locus_tag:line.source_locus_tag,
organism:line.source_organism_name,
gene:line.source_gene,
gene_product:line.source_gene_product})
MERGE (g2:Gene {locus_tag:line.target_locus_tag,
organism:line.target_organism_name,
gene:line.target_gene,
gene_product:line.target_gene_product})
MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2);
"""
result = session.run(command.rstrip())
result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
num_results += 1
#print(record)
print('number of nodes after loading: {}'.format(num_results))
In [ ]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
# set edge labels: write over them and erase the old ones.
command = """
match (n) -[e1 {association:"positive"}]-> (m)
CREATE (n) -[e2:pos]-> (m)
SET e2 = e1
WITH e1
DELETE e1
"""
result = session.run(command.rstrip())
command = """
match (n) -[e1 {association:"negative"}]-> (m)
CREATE (n) -[e2:neg]-> (m)
SET e2 = e1
WITH e1
DELETE e1
"""
result = session.run(command.rstrip())
In [ ]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
# There might be better ways to do this...
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
# 'Methylobacter-123 (UID203)', 'Methylotenera mobilis-49 (UID203)'
command = """
match (n {organism:'Methylotenera mobilis-49 (UID203)'})
set n :Methylotenera_mobilis_49
return n
"""
result = session.run(command.rstrip())
command = """
match (n {organism:'Methylobacter-123 (UID203)'})
set n :Methylobacter_123
return n
"""
result = session.run(command.rstrip())
In [ ]:
# Get a sub-graph with some negative interactions for my progress report figure.
In [ ]:
# Start by just finding some edges that are negative.
# If you don't specify a direction, each edge gets counted twice!
command = """
MATCH () -- ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
command = """
MATCH () --> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
command = """
MATCH () -[r:pos]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
command = """
MATCH () -[r:neg]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)
In [ ]:
command = """
MATCH (a) --> (b)
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
print(record)