Build Adjacency Matrix
In [17]:
import sqlite3
import json
In [18]:
DATABASE = "data.sqlite"
In [19]:
conn = sqlite3.connect(DATABASE)
cursor = conn.cursor()
Queries
In [20]:
# For getting the maximum row id
QUERY_MAX_ID = "SELECT id FROM interactions ORDER BY id DESC LIMIT 1"
# Get interaction data
QUERY_INTERACTION = "SELECT geneids1, mention1, geneids2, mention2 FROM interactions WHERE id = {}"
In [21]:
max_id = cursor.execute(QUERY_MAX_ID).fetchone()[0]
Step through every interaction.
In [22]:
typeahead = {}
final = []
distribution = {}
row_id = 0
while row_id <= max_id:
row_id+= 1
row = cursor.execute(QUERY_INTERACTION.format(row_id))
row = row.fetchone()
if row == None:
continue
id1 = row[0]
symbol1 = row[1]
id2 = row[2]
symbol2 = row[3]
# Only Gene-Gene for this pass
if id1[0] == 'C' or id2[0] == 'C':
pass
else:
continue
if id1[0] == 'C':
if symbol1 not in typeahead:
typeahead[symbol1] = []
if id1 not in typeahead[symbol1]:
typeahead[symbol1].append(id1)
if id2[0] == 'C':
if symbol2 not in typeahead:
typeahead[symbol2] = []
if id2 not in typeahead[symbol2]:
typeahead[symbol2].append(id2)
for key in typeahead:
final.append( {"symbol": key, "values": typeahead[key]} )
In [23]:
with open("chemical_id.json", "w+") as file:
file.write(json.dumps( final ))
In [24]:
print(distribution)
In [ ]: