Reading well headers


In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import os
env = %env

Import into pandas

We can read a spreadsheet straight into a dataframe:


In [3]:
import pandas as pd

In [4]:
filename = os.path.join(env["HOME"], "Dropbox/Agile/NS_DOE/2014-1G/data/_NovaScotia_wells.xlsx")
filename


Out[4]:
'/Users/matt/Dropbox/Agile/NS_DOE/2014-1G/data/_NovaScotia_wells.xlsx'

In [5]:
xl = pd.ExcelFile(filename)
wells = xl.parse('Wells')

In [6]:
wells.head()


Out[6]:
Well_ID Well_sequence_number Well_sequence Well_name UWI Company Basin Spud_date_notes Rig_release_date_notes Spud_year Ground_elevation_(m) Ground_elev_from_DEM_(m) KB_elevation_(m) Elevation_source Total_depth_(m) Geographic_region Northing_(m) Easting_(m) UTM_datum UTM_Zone
0 P-139 139 P ECE-13-P2_ST 200211E10A054E02 st East Coast Energy NaN 2013-11-21 NaN 2013 NaN 40.28 44.68 Geological report 1259 NaN 5045389.81 529556.93 NaN 20 ...
1 P-138 138 P ECE-13-P1 20111E10A054O01 East Coast Energy NaN 2013-11-06 2013-11-16 2013 NaN 35.49 39.89 Geological report 700 NaN 5045965.58 529996.16 NaN 20 ...
2 P-137 137 P Forent South Branch No.1 K-70-D/11-E-03 NaN Forent Energy Ltd NaN NaN NaN 2012 55.90 51.76 55.90 Logs 784 NaN NaN NaN NaN NaN ...
3 P-136 136 P Forent Alton No. 1 E-49-C 11-E-03 NaN Forent Energy Ltd. NaN NaN NaN 2012 36.11 40.24 40.24 Logs 1001 NaN 5003440.80 479104.69 NaN 20 ...
4 P-135 135 P E-38-A/11-E-5 NaN Eastrock Resources NaN 2011-12-02 2011-12-31 2011 119.00 119.00 123.00 DEM 946 NaN 505648.00 420980.00 NaN 20 ...

5 rows × 52 columns

Calculate datum-corrected UTMs for all wells

Make manageable lat-longs

First calculate a decimal lat-long for every well that has lat-long.


In [7]:
wells['lat'] = wells.Lat_Deg + wells.Lat_Min/60. + wells.Lat_Sec/3600.

In [8]:
wells.lat.head()


Out[8]:
0    45.561419
1          NaN
2    45.185752
3    45.185189
4          NaN
Name: lat, dtype: float64

In [9]:
wells['lon'] = -1 * (wells.Long_Deg + wells.Long_Min/60. + wells.Long_Sec/3600.)
wells.lon.head()


Out[9]:
0   -62.621268
1          NaN
2   -63.047873
3   -63.266834
4          NaN
Name: lon, dtype: float64

Make projections

We will need pyproj for transforming coordinates.


In [10]:
import pyproj as pp

In [11]:
ll_nad27 = pp.Proj("+proj=longlat +ellps=clrk66 +datum=NAD27 +no_defs")
ll_nad83 = pp.Proj("+proj=longlat +ellps=GRS80 +datum=NAD83 +no_defs")

In [12]:
ll_ats77 = pp.Proj("+init=EPSG:4122")  # +proj=longlat +a=6378135 +b=6356750.304921594 +no_defs

In [13]:
ll_wgs84 = pp.Proj("+init=EPSG:4326")

In [14]:
utm_nad27 = pp.Proj("+init=EPSG:26720")
utm_nad83 = pp.Proj("+init=EPSG:26920")

In [15]:
# utm_nad27 = pp.Proj("+proj=utm +zone=20T +ellps=clrk66 +north +datum=NAD27 +units=m +no_defs")
# utm_nad83 = pp.Proj("+proj=utm +zone=20T +ellps=GRS80 +north +datum=NAD83 +units=m +no_defs")

Zone should really be 20T.

We'll make a dictionary so we can look these things up easily.


In [16]:
crs = {'NAD83': ll_nad83,
       'NAD27': ll_nad27,
       'ATS77': ll_ats77,
       'WGS84': ll_wgs84,
       'UTM83': utm_nad83,
       'UTM27': utm_nad27,
       None:None}

Step over wells and calculate best-guess coordinates

I cannot figure out the elegant pandas way to do this, so will have to settle for iterating...


In [17]:
ll_datum = []
for well in wells.iterrows():
    if pd.isnull(well[1].Lat_Long_datum):
        if well[1].Spud_year > 2005:
            ll_datum.append('NAD83')
        else:
            ll_datum.append('NAD27')
    else:
        ll_datum.append(well[1].Lat_Long_datum.replace(' ',''))

wells['Lat_Long_datum'] = ll_datum
wells.Lat_Long_datum.head()


Out[17]:
0    NAD83
1    NAD83
2    NAD83
3    NAD83
4    NAD83
Name: Lat_Long_datum, dtype: object

In [18]:
utm_datum = []
for well in wells.iterrows():
    if pd.isnull(well[1].UTM_datum):
        if well[1].Spud_year > 2005:
            utm_datum.append('UTM83')
        else:
            utm_datum.append('UTM27')
    else:
        if well[1].UTM_datum.replace(' ','') == 'NAD83':
            utm_datum.append('UTM83')
        else:
            utm_datum.append('UTM27')

wells['UTM_datum'] = utm_datum
wells.UTM_datum.head()


Out[18]:
0    UTM83
1    UTM83
2    UTM83
3    UTM83
4    UTM83
Name: UTM_datum, dtype: object

If we have lat-long, use it and transform to UTM. Otherwise use the given UTMs, if any.


In [19]:
all_x = []
all_y = []

for well in wells.iterrows():
    datum = crs[well[1].Lat_Long_datum]
    if not pd.isnull(well[1].lat):
        datum = crs[well[1].Lat_Long_datum]
        if datum:
            lon = well[1].lon
            lat = well[1].lat
            x, y = pp.transform(datum, utm_nad83, lon, lat)
            all_x.append(x)
            all_y.append(y)
        else:
            all_x.append(np.NaN)
            all_y.append(np.NaN)
    elif not pd.isnull(well[1]['Northing_(m)']):
        datum = crs[well[1].UTM_datum]
        if datum:
            utm_x = well[1]['Easting_(m)']
            utm_y = well[1]['Northing_(m)']
            x, y = pp.transform(datum, utm_nad83, utm_x, utm_y)
            all_x.append(x)
            all_y.append(y)
        else:
            all_x.append(np.NaN)
            all_y.append(np.NaN)
    else:
        all_x.append(np.NaN)
        all_y.append(np.NaN)
        

wells['UTMx_NAD83'] = all_x
wells['UTMy_NAD83'] = all_y

Now we have a best-guess UTM/NAD83 for every well.


In [20]:
wells.UTMx_NAD83


Out[20]:
0     529556.923418
1     529996.160000
2     496239.104060
3     479037.286693
4     420980.000000
5     529976.000000
6     443001.136538
7     422123.398000
8     486531.000000
9     443262.731646
10    440570.807628
11    463044.110974
12    463079.263268
13    443757.372592
14    414897.620000
...
136    640456.430275
137    640456.430275
138    640456.430275
139    682348.984809
140    713856.023214
141    714249.417355
142    712176.344400
143    710153.389015
144    711553.274807
145    721768.752062
146              NaN
147    661364.172050
148    284461.777706
149              NaN
150              NaN
Name: UTMx_NAD83, Length: 151, dtype: float64

Writing a shapefile


In [24]:
from shapely.geometry import Point, mapping
import fiona
from fiona import collection, crs

schema =  {'geometry': 'Point',
           'properties': {'name': 'str',
                          'KB': 'float:9.3',
                          'TD': 'float:9.3'
                          }
           }

outfile = os.path.join(env["HOME"], "Dropbox/geotransect_test/data/wells/wells.shp")

with fiona.open(outfile, 'w', driver='ESRI Shapefile', crs=crs.from_epsg(26920), schema=schema) as output:
    for well in wells.iterrows():
        if pd.isnull(well[1].UTMx_NAD83): continue
        point = Point(well[1].UTMx_NAD83, well[1].UTMy_NAD83)
        output.write({
            'geometry': mapping(point),
            'properties': {
                'name': well[1].Well_ID,
                'KB': well[1]['KB_elevation_(m)'],
                'TD': well[1]['Total_depth_(m)']
            }
        })

In [27]:
from pprint import pprint

In [28]:
with fiona.open(outfile) as f:
    print f.driver
    print f.crs
    print f.schema
    print len(list(f)), 'points'
    pprint(list(f)[0])


ESRI Shapefile
{'init': u'epsg:26920'}
{'geometry': 'Point', 'properties': OrderedDict([(u'KB', 'float:9.3'), (u'TD', 'float:9.3'), (u'name', 'str:80')])}
148 points
{'geometry': {'coordinates': (529556.9234183785, 5045389.812278276),
              'type': 'Point'},
 'id': '0',
 'properties': OrderedDict([(u'KB', 44.68), (u'TD', 1259.0), (u'name', u'P-139')]),
 'type': 'Feature'}

Write the Petrel file format

We want to write an ASCII file for Petrel. This is the format... we will replace {data} with a CSV, essentially.


In [30]:
petrel_well_file_template = """# Petrel well head
VERSION 1
BEGIN HEADER
Well_ID
Well_sequence_number
Well_name
Surface X [m] NAD 83
Surface Y [m] NAD 83
UTM zone 
KB elevation above sea level
Total depth
Operator
Spud_year
END HEADER
{data}"""

The csv module will make things easier, but then we need a file-like.


In [31]:
from StringIO import StringIO

In [32]:
f = StringIO()
wells.to_csv(f, encoding='utf-8')

In [33]:
data = f.read()
data


Out[33]:
''

I don't understand why that doesn't work. I suspect some sort of Python 2.7 / Unicode issue.

We will have to work around it.


In [34]:
cols = ('Well_ID', 'Well_name', 'Ground_elev_from_DEM_(m)', 'Total_depth_(m)', 'Northing_(m)', 'Easting_(m)')
wells.to_csv('temp.csv', cols=cols, index=False, encoding='utf-8')

In [35]:
with open('temp.csv') as f:
    csv_data = f.read()

In [36]:
with open('Wells_for_Petrel.asc', 'w') as f:
    f.write(petrel_well_file_template.format(data=csv_data))

A pandas footnote

By the way we can do some really cool things with groups and pltting in pandas.


In [37]:
groups = wells.groupby('Basin')

In [38]:
fig, ax = plt.subplots()
for name, group in groups:
    ax.plot(group['Ground_elev_from_DEM_(m)'], label=name)
ax.legend()

plt.show()



In [39]:
wells['Spud_year'].hist()
plt.show()



In [40]:
wells['Spud_year'].hist(by=wells['Basin'])
plt.show()



In [ ]: