In [ ]:
import os
import cPickle as pickle
#load in data files to run
#one set of files has the trial and investigator matches to publication
#and the second has a lookup dict for the trail information
data_list_trial_match = []
data_list_trials = []
data_files = os.listdir('data/matched_trial_article/')
for f in data_files:
if 'trial_match_' in f:
data_list_trial_match.append(f)
if 'trials_' in f:
data_list_trials.append(f)
#dictionaries for joining all the individual dictionaries into
trial_invest_pub_match_dict = {}
pub_lookup_dict = {}
#load in trial match data
for f in data_list_trial_match:
print f
#load data to process
data = pickle.load(open('data/matched_trial_article/' + f, 'rb'))
#add data to trial match dict
trial_invest_pub_match_dict.update(data)
del data
#save dict
pickle.dump(trial_invest_pub_match_dict, open('data/trial_invest_pub_match_dict.pkl', 'wb'))
del trial_invest_pub_match_dict
#load in trial lookup data
for f in data_list_trials:
print f
#load data to process
data = pickle.load(open('data/matched_trial_article/' + f, 'rb'))
#add data to trial match dict
pub_lookup_dict.update(data)
del data
#save dict
pickle.dump(pub_lookup_dict, open('data/pub_lookup_dict.pkl', 'wb'))
del pub_lookup_dict
In [1]:
import cPickle as pickle
In [3]:
investiagtor_dict = pickle.load(open('data/investigator_dict.pkl', 'rb'))
In [9]:
#create investigator id lookup dict
id_investigator_lookup = {}
for invest in investiagtor_dict:
for ids in investiagtor_dict[invest]['id']:
id_investigator_lookup[ids] = invest
In [18]:
#save dict
pickle.dump(id_investigator_lookup, open('data/id_investigator_lookup.pkl', 'wb'))
In [15]:
id_investigator_lookup.items()[:10]
Out[15]:
In [ ]: