Build Adjacency Matrix


In [17]:
import sqlite3
import json

In [18]:
DATABASE = "data.sqlite"

In [19]:
conn = sqlite3.connect(DATABASE)
cursor = conn.cursor()

Queries


In [20]:
# For getting the maximum row id
QUERY_MAX_ID = "SELECT id FROM interactions ORDER BY id DESC LIMIT 1"

# Get interaction data
QUERY_INTERACTION = "SELECT geneids1, mention1, geneids2, mention2 FROM interactions WHERE id = {}"

In [21]:
max_id = cursor.execute(QUERY_MAX_ID).fetchone()[0]

Step through every interaction.

  1. If geneids1 not in matrix - insert it as dict.
  2. If geneids2 not in matrix[geneids1] - insert it as []
  3. If probability not in matrix[geneids1][geneids2] - insert it.
  4. Perform the reverse.

In [22]:
typeahead = {}
final = []
distribution = {}
row_id = 0

while row_id <= max_id:
    row_id+= 1
    
    row = cursor.execute(QUERY_INTERACTION.format(row_id))
    row = row.fetchone()
    
    if row == None:
        continue
       
    id1 = row[0]
    symbol1 = row[1]
    id2 = row[2]
    symbol2 = row[3]

    # Only Gene-Gene for this pass
    if id1[0] == 'C' or id2[0] == 'C':
        pass
    else:
        continue

    if id1[0] == 'C':
        if symbol1 not in typeahead:
            typeahead[symbol1] = []
        if id1 not in typeahead[symbol1]:
            typeahead[symbol1].append(id1)
            
    if id2[0] == 'C':
        if symbol2 not in typeahead:
            typeahead[symbol2] = []
        if id2 not in typeahead[symbol2]:
            typeahead[symbol2].append(id2)
            
for key in typeahead:
    final.append( {"symbol": key, "values": typeahead[key]} )

In [23]:
with open("chemical_id.json", "w+") as file:
    file.write(json.dumps( final ))

In [24]:
print(distribution)


{}

In [ ]: