notebook.community

Edit and run



In [1]:

    
from ARXIV_171202734.chembl_repository import ChemblRepository
import json
import matplotlib.pyplot as plt
from multiprocessing import Pool
import numpy as np
import os
import pandas as pd
import rdkit
import sqlite3

%load_ext autoreload
%autoreload 2
%matplotlib inline



In [2]:

    
chembl_repository = ChemblRepository(os.path.abspath('Data'))



In [3]:

    
memento_file_path = os.path.abspath(os.path.join('Data', 'arXiv_171202734_Memento.json'))



In [4]:

    
memento = {}

if os.path.isfile(memento_file_path):
    memento = json.load(open(memento_file_path))
else:
    memento['last_processed_compound_id'] = -1



In [5]:

    
chembl_repository.get_total_compound_count()









    Out[5]:





2101843



In [6]:

    
chembl_repository.get_remaining_compound_count(memento['last_processed_compound_id'])









    Out[6]:





2101843



In [7]:

    
# TODO: Save memento back to JSON file after each processed batch!