In [1]:
from pandas import DataFrame
In [2]:
STITCH_TO_DRUGBANK = DataFrame.from_csv("./temp/chemical.sources.v5.0_DrugBank.tsv", sep='\t',
index_col=None, header=None)
STITCH_TO_DRUGBANK = STITCH_TO_DRUGBANK.rename(columns={0: 'stitch_stereo', 1: 'stitch_flat', 2: 'database',
3: 'mapped_id'})
In [3]:
stereo_to_drugbank = list(set(zip(STITCH_TO_DRUGBANK.stitch_stereo,
STITCH_TO_DRUGBANK.mapped_id)))
flat_to_drugbank = list(set(zip(STITCH_TO_DRUGBANK.stitch_flat,
STITCH_TO_DRUGBANK.mapped_id)))
In [4]:
len(flat_to_drugbank)
Out[4]:
In [5]:
unique_stereo_to_drugbank = list()
for element in stereo_to_drugbank:
if element[0] not in [i[0] for i in unique_stereo_to_drugbank]:
unique_stereo_to_drugbank.append(element)
In [7]:
prepare_for_insert = DataFrame(unique_stereo_to_drugbank, columns=['stitch_id', 'drugbank_id'])
prepare_for_insert = prepare_for_insert.set_index('stitch_id')
In [9]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:postgres@localhost:32768/efktr')
prepare_for_insert.to_sql('stitch_to_drugbank', engine, if_exists='append')