In [16]:
import pandas as pd
import zipfile; import os; import urllib
In [18]:
## Download and unzip databases from KGS to a temp folder
os.makedirs('./temp', exist_ok=True)
raspath = 'http://www.kgs.ku.edu/PRS/Ora_Archive/ks_elog_scans.zip'
laspath = 'http://www.kgs.ku.edu/PRS/Ora_Archive/ks_las_files.zip'
ras_handle, _ = urllib.request.urlretrieve(raspath)
las_handle, _ = urllib.request.urlretrieve(laspath)
with zipfile.ZipFile(ras_handle,'r') as zip_ras:
zip_ras.extractall('temp')
with zipfile.ZipFile(las_handle,'r') as zip_las:
zip_las.extractall('temp')
Elogs is collection of KGS TIFF files; las is KGS .las files
In [57]:
elogs = pd.read_csv('temp/ks_elog_scans.txt', parse_dates=True)
lases = pd.read_csv('temp/ks_las_files.txt', parse_dates=True)
In [79]:
elogs_mask = elogs['KID'].isin(lases['KGS_ID']) # Create mask for elogs
both_elog = elogs[elogs_mask] # select items elog that fall in both
both_elog_unique = both_elog.drop_duplicates('KID') # remove duplicates
print('How many logs fall in both and have unique KGS_ID? '+str(both_elog_unique.shape[0]))
both_elog_unique_new = both_elog_unique.loc['2000-1-1' : '2017-1-1']
both_elog_unique_new['KID']
Out[79]:
In [59]:
lases_mask = lases['KGS_ID'].isin(elogs['KID']) # Create mask for elogs
both_lases = lases[las_mask] # select items elog that fall in both
both_lases_unique = both_lases.drop_duplicates('KGS_ID') # remove duplicates
print('Other direction -- how many logs fall in both and have unique KGS_ID? '+str(both_lases_unique.shape[0]))
if both_elog_unique.shape[0] == both_lases_unique.shape[0]:
print('Same in both directions.')
In [60]:
elogs_hasdup_bool = elogs['KID'].isin(elogs[elogs.duplicated('KID')]['KID'])
elogs_nodup = elogs[-elogs_hasdup_bool]
elogs_nodup.shape # How many logs have no duplicate?
elogs_nodup.drop_duplicates('KID').shape == elogs_nodup.shape
Out[60]:
In [61]:
lases_hasdup_bool = lases['KGS_ID'].isin(lases[lases.duplicated('KGS_ID')]['KGS_ID'])
lases_nodup = lases[-lases_hasdup_bool]
lases_nodup.shape # How many logs have no duplicate?
# lases_nodup.drop_duplicates('KGS_ID').shape == lases_nodup.shape
Out[61]:
In [62]:
elogs_nodup_mask = elogs_nodup['KID'].isin(lases_nodup['KGS_ID']) # Create mask for elogs
both_elog_nodup = elogs_nodup[elogs_nodup_mask] # select items elog that fall in both
print('How many logs fall in both and have unique KGS_ID? '+str(both_elog_nodup.shape[0]))
lases_nodup_mask = lases_nodup['KGS_ID'].isin(elogs_nodup['KID']) # Create mask for elogs
both_lases_nodup = lases_nodup[lases_nodup_mask] # select items elog that fall in both
print('From other direction -- how many logs fall in both and have unique KGS_ID? '+str(both_lases_nodup.shape[0]))
In [70]:
both_elog_nodup.loc['1980-1-1' : '2017-1-1'].shape
Out[70]: