In [1]:
from ARXIV_171202734.chembl_repository import ChemblRepository
import json
import matplotlib.pyplot as plt
from multiprocessing import Pool
import numpy as np
import os
import pandas as pd
import rdkit
import sqlite3
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
chembl_repository = ChemblRepository(os.path.abspath('Data'))
In [3]:
memento_file_path = os.path.abspath(os.path.join('Data', 'arXiv_171202734_Memento.json'))
In [4]:
memento = {}
if os.path.isfile(memento_file_path):
memento = json.load(open(memento_file_path))
else:
memento['last_processed_compound_id'] = -1
In [5]:
chembl_repository.get_total_compound_count()
Out[5]:
In [6]:
chembl_repository.get_remaining_compound_count(memento['last_processed_compound_id'])
Out[6]:
In [7]:
# TODO: Save memento back to JSON file after each processed batch!