In [1]:
from ARXIV_171202734.chembl_repository import ChemblRepository
import json
import matplotlib.pyplot as plt
from multiprocessing import Pool
import numpy as np
import os
import pandas as pd
import rdkit
import sqlite3

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
chembl_repository = ChemblRepository(os.path.abspath('Data'))

In [3]:
memento_file_path = os.path.abspath(os.path.join('Data', 'arXiv_171202734_Memento.json'))

In [4]:
memento = {}

if os.path.isfile(memento_file_path):
    memento = json.load(open(memento_file_path))
else:
    memento['last_processed_compound_id'] = -1

In [5]:
chembl_repository.get_total_compound_count()


Out[5]:
2101843

In [6]:
chembl_repository.get_remaining_compound_count(memento['last_processed_compound_id'])


Out[6]:
2101843

In [7]:
# TODO: Save memento back to JSON file after each processed batch!