Run & test web scrapers for assessment report directories


In [ ]:
import logging
import sys

import pandas as pd
from pandas import DataFrame, Series

sys.path.append('..')
from chemex.project import ChemexProject
from chemex import reports
from chemex.reports import dir_gsstore, gsstore_substances, dir_techstreet_gsas, hbn_format

logging.getLogger('chemex').handlers[0].setLevel('DEBUG')

In [ ]:
proj = ChemexProject('reports')

In [ ]:
gss = dir_gsstore(dedup=True)

In [ ]:
gss

In [ ]:
hbn_gss = hbn_format(gss, project=proj)

In [ ]:
hbn_gss['CASRN or Material ID'].isnull().value_counts()

In [ ]:
subst = gsstore_substances()

In [ ]:
tech = dir_techstreet_gsas()

In [ ]:
tech

In [ ]:
hbn_tech = hbn_format(tech, project=proj)

In [ ]:
hbn_tech

In [ ]:
hbn_tech['CASRN or Material ID'].isnull().value_counts()