In [16]:
import pandas as pd
import zipfile; import os; import urllib

In [18]:
## Download and unzip databases from KGS to a temp folder
os.makedirs('./temp', exist_ok=True)
raspath = 'http://www.kgs.ku.edu/PRS/Ora_Archive/ks_elog_scans.zip'
laspath = 'http://www.kgs.ku.edu/PRS/Ora_Archive/ks_las_files.zip'

ras_handle, _ = urllib.request.urlretrieve(raspath)
las_handle, _ = urllib.request.urlretrieve(laspath)

with zipfile.ZipFile(ras_handle,'r') as zip_ras:
    zip_ras.extractall('temp')
with zipfile.ZipFile(las_handle,'r') as zip_las:
    zip_las.extractall('temp')

Elogs is collection of KGS TIFF files; las is KGS .las files


In [57]:
elogs = pd.read_csv('temp/ks_elog_scans.txt', parse_dates=True)
lases = pd.read_csv('temp/ks_las_files.txt', parse_dates=True)

In [79]:
elogs_mask = elogs['KID'].isin(lases['KGS_ID'])  # Create mask for elogs
both_elog = elogs[elogs_mask] # select items elog that fall in both
both_elog_unique = both_elog.drop_duplicates('KID') # remove duplicates
print('How many logs fall in both and have unique KGS_ID? '+str(both_elog_unique.shape[0]))
both_elog_unique_new = both_elog_unique.loc['2000-1-1' : '2017-1-1']
both_elog_unique_new['KID']


How many logs fall in both and have unique KGS_ID? 14289
Out[79]:
200008    1044210474
200027    1044194936
200035    1044190116
200038    1044202126
200050    1044186093
200052    1044199083
200058    1044191408
200062    1044177946
200063    1044177947
200064    1044195244
200065    1044177937
200066    1044177518
200067    1044194723
200072    1044186106
200077    1044182654
200084    1044182652
200098    1044164588
200140    1006034398
200428    1006051542
200478    1006051252
200650    1044943927
200653    1044637777
200656    1044577457
200674    1044631637
200690    1044630102
200716    1044581635
200722    1044645563
200728    1044640259
200746    1044623272
200748    1044578544
             ...    
201197    1001263343
201502    1006077870
201509    1006083641
201600    1044959397
201603    1044975022
201606    1044964895
201609    1044948610
201612    1044228776
201614    1044254903
201617    1044628958
201618    1044564354
201625    1044640623
201627    1044254181
201632    1044564364
201636    1044564355
201637    1044569404
201645    1044569422
201650    1044569421
201655    1044254152
201660    1044572912
201671    1044640605
201673    1044640622
201675    1044572425
201679    1044574162
201681    1044569405
201686    1044574159
201688    1044574160
201690    1044574095
201697    1044574094
201698    1044574158
Name: KID, Length: 139, dtype: int64

In [59]:
lases_mask = lases['KGS_ID'].isin(elogs['KID'])  # Create mask for elogs
both_lases = lases[las_mask] # select items elog that fall in both
both_lases_unique = both_lases.drop_duplicates('KGS_ID') # remove duplicates
print('Other direction -- how many logs fall in both and have unique KGS_ID? '+str(both_lases_unique.shape[0]))
if both_elog_unique.shape[0] == both_lases_unique.shape[0]:
    print('Same in both directions.')


Other direction -- how many logs fall in both and have unique KGS_ID? 14289
Same in both directions.

In [60]:
elogs_hasdup_bool = elogs['KID'].isin(elogs[elogs.duplicated('KID')]['KID'])
elogs_nodup = elogs[-elogs_hasdup_bool]
elogs_nodup.shape # How many logs have no duplicate?
elogs_nodup.drop_duplicates('KID').shape == elogs_nodup.shape


Out[60]:
True

In [61]:
lases_hasdup_bool = lases['KGS_ID'].isin(lases[lases.duplicated('KGS_ID')]['KGS_ID'])
lases_nodup = lases[-lases_hasdup_bool]
lases_nodup.shape # How many logs have no duplicate?
# lases_nodup.drop_duplicates('KGS_ID').shape == lases_nodup.shape


Out[61]:
(13867, 12)

Trying again after filtering out any wells that have duplicate logs


In [62]:
elogs_nodup_mask = elogs_nodup['KID'].isin(lases_nodup['KGS_ID'])  # Create mask for elogs
both_elog_nodup = elogs_nodup[elogs_nodup_mask] # select items elog that fall in both
print('How many logs fall in both and have unique KGS_ID?   '+str(both_elog_nodup.shape[0]))

lases_nodup_mask = lases_nodup['KGS_ID'].isin(elogs_nodup['KID'])  # Create mask for elogs
both_lases_nodup = lases_nodup[lases_nodup_mask] # select items elog that fall in both
print('From other direction -- how many logs fall in both and have unique KGS_ID?   '+str(both_lases_nodup.shape[0]))


How many logs fall in both and have unique KGS_ID?   1736
From other direction -- how many logs fall in both and have unique KGS_ID?   1736

Select logs from 1980 onward


In [70]:
both_elog_nodup.loc['1980-1-1' : '2017-1-1'].shape


Out[70]:
(17, 32)