Build Adjacency Matrix



In [17]:

    
import sqlite3
import json



In [18]:

    
DATABASE = "data.sqlite"



In [19]:

    
conn = sqlite3.connect(DATABASE)
cursor = conn.cursor()

Queries



In [20]:

    
# For getting the maximum row id
QUERY_MAX_ID = "SELECT id FROM interactions ORDER BY id DESC LIMIT 1"

# Get interaction data
QUERY_INTERACTION = "SELECT geneids1, mention1, geneids2, mention2 FROM interactions WHERE id = {}"



In [21]:

    
max_id = cursor.execute(QUERY_MAX_ID).fetchone()[0]

Step through every interaction.

If geneids1 not in matrix - insert it as dict.
If geneids2 not in matrix[geneids1] - insert it as []
If probability not in matrix[geneids1][geneids2] - insert it.
Perform the reverse.



In [22]:

    
typeahead = {}
final = []
distribution = {}
row_id = 0

while row_id <= max_id:
    row_id+= 1
    
    row = cursor.execute(QUERY_INTERACTION.format(row_id))
    row = row.fetchone()
    
    if row == None:
        continue
       
    id1 = row[0]
    symbol1 = row[1]
    id2 = row[2]
    symbol2 = row[3]

    # Only Gene-Gene for this pass
    if id1[0] == 'C' or id2[0] == 'C':
        pass
    else:
        continue

    if id1[0] == 'C':
        if symbol1 not in typeahead:
            typeahead[symbol1] = []
        if id1 not in typeahead[symbol1]:
            typeahead[symbol1].append(id1)
            
    if id2[0] == 'C':
        if symbol2 not in typeahead:
            typeahead[symbol2] = []
        if id2 not in typeahead[symbol2]:
            typeahead[symbol2].append(id2)
            
for key in typeahead:
    final.append( {"symbol": key, "values": typeahead[key]} )



In [23]:

    
with open("chemical_id.json", "w+") as file:
    file.write(json.dumps( final ))



In [24]:

    
print(distribution)

{}



In [ ]: