In [3]:
import cPickle as pickle
import pandas as pd
from sqlalchemy import create_engine
import db_connect
from collections import defaultdict, Counter
from sqlalchemy.sql import func, select, and_, or_, not_, desc
from db_tables import metadata, TrialPublications
In [34]:
trial_scores = pickle.load(open('trial_scores.pkl', 'rb'))
In [35]:
Counter(trial_scores.values())
Out[35]:
In [11]:
trial_links = pickle.load(open('pubmed_trial_links.pkl', 'rb'))
trial_links = [(x[0], x[1]) for x in trial_links.items() if x[1] != '']
trial_links = dict(trial_links)
In [41]:
#initialize the connection to the db
engine = create_engine('mysql+pymysql://' + db_connect.conn)
metadata.create_all(engine)
In [42]:
#pull down the publication table to update it
trial_pubs_table = [(x[0],x[1],x[2],x[3],x[4],x[5]) for x in engine.execute(select([TrialPublications]))]
In [63]:
trial_link_lookup = zip(trial_links.values(), trial_links.keys())
new_pub_table = []
for trial in trial_pubs_table:
if (trial[0], trial[1]) in trial_link_lookup:
new_pub_table.append(trial[:5] + (1.0,))
elif (trial[0], trial[1]) in trial_scores:
score = trial_scores[(trial[0], trial[1])]
if score == 4:
new_score = 0.3
elif score == 5:
new_score = 0.6
else:
new_score = 0.9
new_pub_table.append(trial[:5] + (new_score,))
elif trial[5] == 0.0:
new_pub_table.append(trial[:5] + (0.3,))
else:
new_pub_table.append(trial)
In [64]:
import csv
with open('data/new_pub_table.txt', 'w') as file:
w = csv.writer(file, dialect='excel-tab')
w.writerows(new_pub_table)
In [ ]: