In [ ]:
from neo4j.v1 import GraphDatabase, basic_auth
import pandas as pd

In [ ]:
driver = GraphDatabase.driver("bolt://localhost/", 
                              auth=basic_auth("neo4j", "theraturkey"))
session = driver.session()

Useful Commands:


In [ ]:
filename = '../data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'

In [ ]:
filename

In [ ]:
! ls ../data/Methylobacter--Methylotenera

In [ ]:
df = pd.read_csv(filename, sep='\t')

In [ ]:
df.head(4)

In [ ]:
df.shape

In [ ]:
# Calculate expected number of nodes:
nodes = len(set(df['source_locus_tag'].drop_duplicates().tolist() + df['target_locus_tag'].drop_duplicates().tolist()))
print("{} nodes are expected".format(nodes))

In [ ]:
org_names = set(df['source_organism_name'].drop_duplicates().tolist() + 
                df['target_organism_name'].drop_duplicates().tolist())
count = len(org_names)
if count != 2:
    print("Expected exactly 2 organsm names, but we have {}".format(count))
    print(org_names)

In [ ]:
org_names

In [ ]:
df.head(2)

In [ ]:
%matplotlib inline
df.weight.hist()

In [ ]:
# wipe everything first:
session.run(""" MATCH (n) DETACH DELETE n """)

result = session.run(""" MATCH (n) RETURN n """)
num_results = 0
for record in result:
    num_results += 1
    print(record)
print('number of nodes before starting: {}'.format(num_results))

command = """
    LOAD CSV WITH HEADERS FROM 
        'https://raw.githubusercontent.com/JanetMatsen/Neo4j_meta4/master/data/Methylobacter--Methylotenera/Methylobacter-123--Methylotenera_mobilis-49_network--some_top_genes--1268_rows.tsv'  
    AS line FIELDTERMINATOR '\t'
    MERGE (g1:Gene {locus_tag:line.source_locus_tag, 
                    organism:line.source_organism_name, 
                    gene:line.source_gene,
                    gene_product:line.source_gene_product}) 
    MERGE (g2:Gene {locus_tag:line.target_locus_tag,
                    organism:line.target_organism_name,
                    gene:line.target_gene,
                    gene_product:line.target_gene_product}) 
    MERGE (g1) -[:X {weight:line.weight, association:line.association}]-> (g2); 
    """
result = session.run(command.rstrip())

result = session.run(""" match(n) return n """)
num_results = 0
for record in result:
    num_results += 1
    #print(record)
print('number of nodes after loading: {}'.format(num_results))

In [ ]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())

for record in result:
    print(record)

In [ ]:
# set edge labels: write over them and erase the old ones.
command = """
    match (n) -[e1 {association:"positive"}]-> (m)
    CREATE (n) -[e2:pos]-> (m)
    SET e2 = e1
    WITH e1
    DELETE e1
"""
result = session.run(command.rstrip())

command = """
    match (n) -[e1 {association:"negative"}]-> (m)
    CREATE (n) -[e2:neg]-> (m)
    SET e2 = e1
    WITH e1
    DELETE e1
"""
result = session.run(command.rstrip())

In [ ]:
command = """
MATCH (n)
RETURN count(*)
"""
result = session.run(command.rstrip())

for record in result:
    print(record)

In [ ]:
# There might be better ways to do this... 
# http://stackoverflow.com/questions/29922140/labels-on-nodes-and-relationships-from-a-csv-file
# 'Methylobacter-123 (UID203)', 'Methylotenera mobilis-49 (UID203)'
command = """
    match (n {organism:'Methylotenera mobilis-49 (UID203)'})
    set n :Methylotenera_mobilis_49
    return n
"""
result = session.run(command.rstrip())
command = """
    match (n {organism:'Methylobacter-123 (UID203)'})
    set n :Methylobacter_123
    return n
"""
result = session.run(command.rstrip())

In [ ]:
# Get a sub-graph with some negative interactions for my progress report figure.

In [ ]:
# Start by just finding some edges that are negative. 
# If you don't specify a direction, each edge gets counted twice!
command = """
MATCH () -- ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

In [ ]:
command = """
MATCH () --> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

In [ ]:
command = """
MATCH () -[r:pos]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

In [ ]:
command = """
MATCH () -[r:neg]-> ()
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)

In [ ]:
command = """
MATCH (a) --> (b)
RETURN count(*)
"""
result = session.run(command.rstrip())
for record in result:
    print(record)