In [1]:
from pandas import DataFrame

In [2]:
STITCH_TO_DRUGBANK = DataFrame.from_csv("./temp/chemical.sources.v5.0_DrugBank.tsv", sep='\t',
                                        index_col=None, header=None)
STITCH_TO_DRUGBANK = STITCH_TO_DRUGBANK.rename(columns={0: 'stitch_stereo', 1: 'stitch_flat', 2: 'database',
                                                 3: 'mapped_id'})

In [3]:
stereo_to_drugbank = list(set(zip(STITCH_TO_DRUGBANK.stitch_stereo,
             STITCH_TO_DRUGBANK.mapped_id)))
flat_to_drugbank = list(set(zip(STITCH_TO_DRUGBANK.stitch_flat,
             STITCH_TO_DRUGBANK.mapped_id)))

In [4]:
len(flat_to_drugbank)


Out[4]:
6923

In [5]:
unique_stereo_to_drugbank = list()

for element in stereo_to_drugbank:
    if element[0] not in [i[0] for i in unique_stereo_to_drugbank]:
        unique_stereo_to_drugbank.append(element)

In [7]:
prepare_for_insert = DataFrame(unique_stereo_to_drugbank, columns=['stitch_id', 'drugbank_id'])
prepare_for_insert = prepare_for_insert.set_index('stitch_id')

In [9]:
from sqlalchemy import create_engine

engine = create_engine('postgresql://postgres:postgres@localhost:32768/efktr')
prepare_for_insert.to_sql('stitch_to_drugbank', engine, if_exists='append')