In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
import os
env = %env
We can read a spreadsheet straight into a dataframe:
In [3]:
import pandas as pd
In [4]:
filename = os.path.join(env["HOME"], "Dropbox/Agile/NS_DOE/2014-1G/data/_NovaScotia_wells.xlsx")
filename
Out[4]:
In [5]:
xl = pd.ExcelFile(filename)
wells = xl.parse('Wells')
In [6]:
wells.head()
Out[6]:
First calculate a decimal lat-long for every well that has lat-long.
In [7]:
wells['lat'] = wells.Lat_Deg + wells.Lat_Min/60. + wells.Lat_Sec/3600.
In [8]:
wells.lat.head()
Out[8]:
In [9]:
wells['lon'] = -1 * (wells.Long_Deg + wells.Long_Min/60. + wells.Long_Sec/3600.)
wells.lon.head()
Out[9]:
We will need pyproj for transforming coordinates.
In [10]:
import pyproj as pp
In [11]:
ll_nad27 = pp.Proj("+proj=longlat +ellps=clrk66 +datum=NAD27 +no_defs")
ll_nad83 = pp.Proj("+proj=longlat +ellps=GRS80 +datum=NAD83 +no_defs")
In [12]:
ll_ats77 = pp.Proj("+init=EPSG:4122") # +proj=longlat +a=6378135 +b=6356750.304921594 +no_defs
In [13]:
ll_wgs84 = pp.Proj("+init=EPSG:4326")
In [14]:
utm_nad27 = pp.Proj("+init=EPSG:26720")
utm_nad83 = pp.Proj("+init=EPSG:26920")
In [15]:
# utm_nad27 = pp.Proj("+proj=utm +zone=20T +ellps=clrk66 +north +datum=NAD27 +units=m +no_defs")
# utm_nad83 = pp.Proj("+proj=utm +zone=20T +ellps=GRS80 +north +datum=NAD83 +units=m +no_defs")
Zone should really be 20T.
We'll make a dictionary so we can look these things up easily.
In [16]:
crs = {'NAD83': ll_nad83,
'NAD27': ll_nad27,
'ATS77': ll_ats77,
'WGS84': ll_wgs84,
'UTM83': utm_nad83,
'UTM27': utm_nad27,
None:None}
I cannot figure out the elegant pandas way to do this, so will have to settle for iterating...
In [17]:
ll_datum = []
for well in wells.iterrows():
if pd.isnull(well[1].Lat_Long_datum):
if well[1].Spud_year > 2005:
ll_datum.append('NAD83')
else:
ll_datum.append('NAD27')
else:
ll_datum.append(well[1].Lat_Long_datum.replace(' ',''))
wells['Lat_Long_datum'] = ll_datum
wells.Lat_Long_datum.head()
Out[17]:
In [18]:
utm_datum = []
for well in wells.iterrows():
if pd.isnull(well[1].UTM_datum):
if well[1].Spud_year > 2005:
utm_datum.append('UTM83')
else:
utm_datum.append('UTM27')
else:
if well[1].UTM_datum.replace(' ','') == 'NAD83':
utm_datum.append('UTM83')
else:
utm_datum.append('UTM27')
wells['UTM_datum'] = utm_datum
wells.UTM_datum.head()
Out[18]:
If we have lat-long, use it and transform to UTM. Otherwise use the given UTMs, if any.
In [19]:
all_x = []
all_y = []
for well in wells.iterrows():
datum = crs[well[1].Lat_Long_datum]
if not pd.isnull(well[1].lat):
datum = crs[well[1].Lat_Long_datum]
if datum:
lon = well[1].lon
lat = well[1].lat
x, y = pp.transform(datum, utm_nad83, lon, lat)
all_x.append(x)
all_y.append(y)
else:
all_x.append(np.NaN)
all_y.append(np.NaN)
elif not pd.isnull(well[1]['Northing_(m)']):
datum = crs[well[1].UTM_datum]
if datum:
utm_x = well[1]['Easting_(m)']
utm_y = well[1]['Northing_(m)']
x, y = pp.transform(datum, utm_nad83, utm_x, utm_y)
all_x.append(x)
all_y.append(y)
else:
all_x.append(np.NaN)
all_y.append(np.NaN)
else:
all_x.append(np.NaN)
all_y.append(np.NaN)
wells['UTMx_NAD83'] = all_x
wells['UTMy_NAD83'] = all_y
Now we have a best-guess UTM/NAD83 for every well.
In [20]:
wells.UTMx_NAD83
Out[20]:
In [24]:
from shapely.geometry import Point, mapping
import fiona
from fiona import collection, crs
schema = {'geometry': 'Point',
'properties': {'name': 'str',
'KB': 'float:9.3',
'TD': 'float:9.3'
}
}
outfile = os.path.join(env["HOME"], "Dropbox/geotransect_test/data/wells/wells.shp")
with fiona.open(outfile, 'w', driver='ESRI Shapefile', crs=crs.from_epsg(26920), schema=schema) as output:
for well in wells.iterrows():
if pd.isnull(well[1].UTMx_NAD83): continue
point = Point(well[1].UTMx_NAD83, well[1].UTMy_NAD83)
output.write({
'geometry': mapping(point),
'properties': {
'name': well[1].Well_ID,
'KB': well[1]['KB_elevation_(m)'],
'TD': well[1]['Total_depth_(m)']
}
})
In [27]:
from pprint import pprint
In [28]:
with fiona.open(outfile) as f:
print f.driver
print f.crs
print f.schema
print len(list(f)), 'points'
pprint(list(f)[0])
We want to write an ASCII file for Petrel. This is the format... we will replace {data} with a CSV, essentially.
In [30]:
petrel_well_file_template = """# Petrel well head
VERSION 1
BEGIN HEADER
Well_ID
Well_sequence_number
Well_name
Surface X [m] NAD 83
Surface Y [m] NAD 83
UTM zone
KB elevation above sea level
Total depth
Operator
Spud_year
END HEADER
{data}"""
The csv module will make things easier, but then we need a file-like.
In [31]:
from StringIO import StringIO
In [32]:
f = StringIO()
wells.to_csv(f, encoding='utf-8')
In [33]:
data = f.read()
data
Out[33]:
I don't understand why that doesn't work. I suspect some sort of Python 2.7 / Unicode issue.
We will have to work around it.
In [34]:
cols = ('Well_ID', 'Well_name', 'Ground_elev_from_DEM_(m)', 'Total_depth_(m)', 'Northing_(m)', 'Easting_(m)')
wells.to_csv('temp.csv', cols=cols, index=False, encoding='utf-8')
In [35]:
with open('temp.csv') as f:
csv_data = f.read()
In [36]:
with open('Wells_for_Petrel.asc', 'w') as f:
f.write(petrel_well_file_template.format(data=csv_data))
By the way we can do some really cool things with groups and pltting in pandas.
In [37]:
groups = wells.groupby('Basin')
In [38]:
fig, ax = plt.subplots()
for name, group in groups:
ax.plot(group['Ground_elev_from_DEM_(m)'], label=name)
ax.legend()
plt.show()
In [39]:
wells['Spud_year'].hist()
plt.show()
In [40]:
wells['Spud_year'].hist(by=wells['Basin'])
plt.show()
In [ ]: