In [1]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from glob import glob
import os
import shapely.geometry as shpg
from shapely.ops import cascaded_union
import gdal
import time
from ipywidgets import *
import matplotlib.pyplot as plt
% matplotlib inline
In [2]:
def get_buffered_ext(shape, buff_dist):
"""Create a buffer around a feature and return shape+buffer.
"""
buffer = shape.buffer(buff_dist)
total = cascaded_union(buffer, shape)
return total
In [3]:
def pixelOffset2coord(rasterfn,xOffset,yOffset):
"""Change pixel coordinates to coordinates"""
raster = gdal.Open(rasterfn)
geotransform = raster.GetGeoTransform()
originX = geotransform[0]
originY = geotransform[3]
pixelWidth = geotransform[1]
pixelHeight = geotransform[5]
coordX = originX + pixelWidth * xOffset
coordY = originY + pixelHeight * yOffset
return coordX, coordY
In [4]:
def raster2array(rasterfn):
"""Convert a raster to numpy array"""
raster = gdal.Open(rasterfn)
geotransform = raster.GetGeoTransform()
extent = [geotransform[0], geotransform[1], geotransform[3], geotransform[5]]
band = raster.GetRasterBand(1)
array = band.ReadAsArray()
return array, extent
In [5]:
def getNoDataValue(rasterfn):
"""Get no data value from a raster"""
raster = gdal.Open(rasterfn)
band = raster.GetRasterBand(1)
return band.GetNoDataValue()
In [231]:
def strucinfo(rootgrp):
def walktree(top):
values = top.groups.values()
yield values
for value in top.groups.values():
for children in walktree(value):
yield children
print (rootgrp)
for children in walktree(rootgrp):
for child in children:
print (child)
In [223]:
def ncdump(nc_fid, verb=True):
'''
Adapted from Chris Slocum, modified for Python 3.5 & extended for netCDF4 (groups) by Johannes Landmann.
ncdump outputs dimensions, variables and their attribute information.
The information is similar to that of NCAR's ncdump utility.
ncdump requires a valid instance of Dataset.
Parameters
----------
nc_fid : netCDF4.Dataset
A netCDF4 dateset object
verb : Boolean
whether or not nc_attrs, nc_dims, and nc_vars are printed
Returns
-------
nc_attrs : list
A Python list of the NetCDF file global attributes
nc_dims : list
A Python list of the NetCDF file dimensions
nc_vars : list
A Python list of the NetCDF file variables
'''
def print_ncattr(key):
"""
Prints the NetCDF file attributes for a given key
Parameters
----------
key : unicode
a valid netCDF4.Dataset.variables key
"""
try:
print ("\t\ttype:", repr(nc_fid.variables[key].dtype))
for ncattr in nc_fid.variables[key].ncattrs():
print ('\t\t%s:' % ncattr,\
repr(nc_fid.variables[key].getncattr(ncattr)))
except KeyError:
print ("\t\tWARNING: %s does not contain variable attributes" % key)
# NetCDF global attributes
nc_attrs = nc_fid.ncattrs()
if verb:
print ("NetCDF Global Attributes:")
for nc_attr in nc_attrs:
print ('\t%s:' % nc_attr, repr(nc_fid.getncattr(nc_attr)))
nc_dims = [dim for dim in nc_fid.dimensions] # list of nc dimensions
# Dimension shape information.
if verb:
print ("NetCDF dimension information:")
for dim in nc_dims:
print ("\tName:", dim)
print ("\t\tsize:", len(nc_fid.dimensions[dim]))
print_ncattr(dim)
# Variable information.
nc_vars = [var for var in nc_fid.variables] # list of nc variables
if verb:
print ("NetCDF variable information:")
for var in nc_vars:
if var not in nc_dims:
print ('\tName:', var)
print ("\t\tdimensions:", nc_fid.variables[var].dimensions)
print ("\t\tsize:", nc_fid.variables[var].size)
print_ncattr(var)
return nc_attrs, nc_dims, nc_vars
In [173]:
pdf = pd.read_csv('C:\\Users\\jlandman\\Desktop\\database_Fischer_et_al._2015_The_Cryosphere.txt', sep = '\t', encoding='iso-8859-1')
pdf.ID_SGI1973 = pdf.ID_SGI1973.astype('str')
pda = pd.read_excel('C:\\Users\\jlandman\\Desktop\\__data-request_GLACIER.xlsx', encoding='iso-8859-1')
pdd = pd.read_excel('C:\\Users\\jlandman\\Desktop\\__data-request_CHANGE.xlsx', encoding='iso-8859-1')
pdd = pdd[~pd.isnull(pdd.REMARKS)]
In [7]:
def FoG_equiv(sgi1973_code):
"""Find the FoG_ID equivalent to a given SGI1973 ID"""
sgi1973_code = sgi1973_code.lstrip('0')
try:
sgi2010 = pdf[pdf.ID_SGI1973.str.contains(sgi1973_code)].Code_SGI2010.iloc[0]
except:
wgms_id = np.nan
return wgms_id
wgms_id = pdd[pdd.REMARKS.str.contains(str(sgi2010))].WGMS_ID.iloc[0]
return wgms_id
In [8]:
def make_nc4(fpath, data, extent, location):
"""Compile a netCDF4 dataset"""
with nc.Dataset(fpath, 'w', format='NETCDF4_CLASSIC') as dataset:
# get the WGMS_ID (a bit complicated) in order to be able to retrieve data from the FoG tables
sgi_73_id = os.path.basename(fpath).split('_')[1].split('.')[0]
wgms_id = FoG_equiv(sgi_73_id)
if np.isnan(wgms_id):
print("No equivalent for SGI1973 %s" % sgi_73_id)
return
# extract/calculate the data
lats_out = np.arange(extent[2] - data.shape[0] * extent[3], extent[2], extent[3])
lons_out = np.arange(extent[0] - data.shape[1] * extent[1], extent[1], extent[1])
change_out = data[...,0]
unc_out = data[...,1]
mask_out = data[...,2]
# create dimensions
dataset.createDimension('latitude', data.shape[0])
dataset.createDimension('longitude', data.shape[1])
dataset.createDimension('time', None)
# create some metadata attributes
dataset.WGMS_ID = wgms_id
try:
dataset.region = pda[pda.WGMS_ID == wgms_id].REGION.iloc[0]
except:
pass
try:
dataset.subregion = pda[pda.WGMS_ID == wgms_id].SUBREGION.iloc[0]
except:
pass
try:
dataset.reference = pdd[pdd.WGMS_ID == wgms_id].REFERENCE.iloc[0]
except:
pass
dataset.source = 'World Glacier Monitoring Service, Zurich, Switzerland'
dataset.history = 'Created ' + time.ctime(time.time())
dataset._FillValue = -9999.
# create variables
lats = dataset.createVariable('latitude', np.float32, ('latitude',), zlib=True, least_significant_digit=5)
lons = dataset.createVariable('longitude', np.float32, ('longitude',), zlib=True, least_significant_digit=5)
# Assign units attributes to coordinate var data. This attaches a
# text attribute to each of the coordinate variables, containing the
# units.
lats.units = 'degrees_north'
lons.units = 'degrees_east'
# write data to coordinate vars.
lats[:] = lats_out
lons[:] = lons_out
# create the pressure and temperature variables
change = dataset.createVariable('change', float, ('latitude','longitude'), zlib=True, least_significant_digit=3)
unc = dataset.createVariable('uncertainty', float, ('latitude','longitude'), zlib=True,least_significant_digit=3)
mask = dataset.createVariable('mask', int, ('latitude','longitude'), zlib=True, least_significant_digit=3)
# set the units attribute.
change.units = 'm'
unc.units = 'm'
mask.units = ''
# write data to variables.
change[:] = change_out
unc[:] = unc_out
mask[:] = mask_out
In [220]:
def make_nc4_groups(outpath, data_e, data_c, e_times, c_periods, extent, location):
"""Compile a netCDF4 dataset to the given outpath.
Parameters
-----------
outpath: Path for the output netCDF4 file
data_e: Elevation data as array (third dimension contains elevation and its uncertainty alternately)
data_c: Change data as array (third dimension contains change and its uncertainty alternately)
e_times: Times (in years AD) of elevation grids (format: YYYY)
c_periods: Reference periods (in years AD) of change grids (format: Y2Y2Y2Y2Y1Y1Y1Y1)
extent: Geographical extent of the data
location: Example parsable argument, here the political unit
"""
# get WGMS_ID for retrieving data from FoG
sgi_73_id = os.path.basename(outpath).split('_')[1].split('.')[0]
wgms_id = FoG_equiv(sgi_73_id)
if np.isnan(wgms_id):
print("No equivalent for SGI1973 %s" % sgi_73_id)
return
with nc.Dataset(outpath, 'w') as rootgrp:
chggrp = rootgrp.createGroup("change")
elvgrp = rootgrp.createGroup("elevation")
# extract/calculate the data
latsc_out = np.arange(extent[2] - data_c.shape[0] * extent[3], extent[2], extent[3])
lonsc_out = np.arange(extent[0] - data_c.shape[1] * extent[1], extent[1], extent[1])
latse_out = np.arange(extent[2] - data_e.shape[0] * extent[3], extent[2], extent[3])
lonse_out = np.arange(extent[0] - data_e.shape[1] * extent[1], extent[1], extent[1])
# it cannot guarantee everything, but at least it reveals if there could be missing data or uncertainties
assert data_c.shape[2] % 2 == 0
assert data_e.shape[2] % 2 == 0
# grab the data
chg_out = data_c[:,:,::2] # get every second element of the third dimension, starting at 0
chg_unc_out = data_c[:,:,1::2] # get every second element of the third dimension, starting at 1
elev_out = data_e[:,:,::2] # get every second element of the third dimension, starting at 0
elev_unc_out = data_e[:,:,1::2]# get every second element of the third dimension, starting at 1
# create dimensions
elvgrp.createDimension('latitude', data_e.shape[0])
elvgrp.createDimension('longitude', data_e.shape[1])
elvgrp.createDimension('time', None)
chggrp.createDimension('latitude', data_c.shape[0])
chggrp.createDimension('longitude', data_c.shape[1])
chggrp.createDimension('level', None)
# create some metadata attributes from FoG
rootgrp.WGMS_ID = wgms_id
gseries = pda[pda.WGMS_ID == wgms_id]
if not gseries.empty:
rootgrp.glaciername = pda[pda.WGMS_ID == wgms_id].NAME.iloc[0]
rootgrp.region = pda[pda.WGMS_ID == wgms_id]['GEO-REGION_CODE'].iloc[0]
rootgrp.subregion = pda[pda.WGMS_ID == wgms_id]['GEO-SUBREGION_CODE'].iloc[0]
rootgrp.references = pdd[pdd.WGMS_ID == wgms_id].REFERENCE.iloc[0]
rootgrp.conventions = 'CF-1.6'
rootgrp.title = 'Glacier surface height and its changes for %s (FoG ID %s )' % (pda[pda.WGMS_ID == wgms_id].NAME.iloc[0], wgms_id)
rootgrp.institution = 'World Glacier Monitoring Service (WGMS), Zurich, Switzerland'
rootgrp.source = 'Space/terrestrial remote sensing or maps'
rootgrp.comment = 'Miscellaneous data sources are documented in FoG dataset'
rootgrp.contact = 'wgms@geo.uzh.ch'
rootgrp.history = 'Created ' + time.ctime(time.time())
rootgrp._FillValue = -9999.000000
#rootgrp._FillValue = 9.96920996839e+36
# create variables
latse = elvgrp.createVariable('latitude', np.float32, ('latitude',), zlib=True, least_significant_digit=5)
lonse = elvgrp.createVariable('longitude', np.float32, ('longitude',), zlib=True, least_significant_digit=5)
timese = elvgrp.createVariable('time', np.int16, ('time',), zlib=True, least_significant_digit=0)
latsc = chggrp.createVariable('latitude', np.float32, ('latitude',), zlib=True, least_significant_digit=5)
lonsc = chggrp.createVariable('longitude', np.float32, ('longitude',), zlib=True, least_significant_digit=5)
levelsc = chggrp.createVariable('level', np.int32, ('level',), zlib=True, least_significant_digit=0)
# Assign units attributes to coordinate var data. This attaches a
# text attribute to each of the coordinate variables, containing the units
latse.units = 'degree_north'
latse.long_name = 'Latitude'
latse.standard_name = 'latitude'
lonse.units = 'degree_east'
lonse.long_name = 'Longitude'
lonse.standard_name = 'longitude'
timese.units = 'years since 0000-01-01 00:00:00'
timese.calendar = 'gregorian'
latsc.units = 'degree_north'
latsc.long_name = 'Latitude'
latsc.standard_name = 'latitude'
lonsc.units = 'degree_east'
lonsc.long_name = 'Longitude'
latse.standard_name = 'latitude'
levelsc.long_name = 'Subtraction grids' # good idea? dimension- and unitless coordinate
# write data to coordinate vars
latse[:] = latse_out
lonse[:] = lonse_out
timese[:] = e_times
latsc[:] = latsc_out
lonsc[:] = lonsc_out
# create the elevation and its uncertainty as variables
chg = chggrp.createVariable('change', float, ('level', 'latitude','longitude'), zlib=True, least_significant_digit=3)
chg_unc = chggrp.createVariable('uncertainty', float, ('level', 'latitude','longitude'), zlib=True,least_significant_digit=3)
# create the change and its uncertainty as variables
elev = elvgrp.createVariable('elevation', float, ('time','latitude','longitude'), zlib=True, least_significant_digit=3)
elev_unc = elvgrp.createVariable('uncertainty', float, ('time','latitude','longitude'), zlib=True,least_significant_digit=3)
# set the units attribute
chg.units = 'm'
chg.standard_name = 'land_surface_height_change' # new invention
chg_unc.units = 'm'
chg_unc.standard_name = 'land_surface_height_change_uncertainty' # new invention
elev.units = 'm'
elev.standard_name = 'land_ice_surface_height' # new invention
elev_unc.units = 'm'
elev.standard_name = 'land_ice_surface_height_uncertainty' # new invention
# specify the coordinate axes
chg._CoordinateAxes = "level lat lon"
chg_unc._CoordinateAxes = "level lat lon"
elev._CoordinateAxes = "time lat lon"
elev_unc._CoordinateAxes = "time lat lon"
latse._CoordinateAxisType = "Lat"
lonse._CoordinateAxisType = "Lon"
timese._CoordinateAxisType = "Times"
latsc._CoordinateAxisType = "Lat"
lonsc._CoordinateAxisType = "Lon"
levelsc._CoordinateAxisType = "Times"
# simplify change periods/time for indexing
tim_range = np.arange(len(e_times))
chg_range = np.arange(len(c_periods))
# make th data arrays fit the netcdf variables
chg_out = np.rollaxis(chg_out, -1, 0)
chg_unc_out = np.rollaxis(chg_unc_out, -1, 0)
elev_out = np.rollaxis(elev_out, -1, 0)
elev_unc_out = np.rollaxis(elev_unc_out, -1, 0)
# write data
chg[:] = chg_out
chg_unc[:] = chg_unc_out
elev[:] = elev_out
elev_unc[:] = elev_unc_out
In [ ]:
fpaths = glob("C:\\Users\\jlandman\\Desktop\\model_db\\data\\glacier_dh_rasterized_fischer et al. 2015\\dh_*.*")
e_times = [1986,2016]
c_periods = [20161986, 19861850]
for fp in fpaths:
changes, extent = raster2array(fp)
changes_uncertainty, extent = raster2array(fp)
elev, extent = raster2array(fp)
elev_uncertainty, extent = raster2array(fp)
dvi=np.dstack((changes,changes, changes))
dvi_big = np.dstack((changes, changes_uncertainty, elev, elev_uncertainty))
#make_nc4('C:\\users\\jlandman\\desktop\\testnetCDFs\\simple\\dh_'+os.path.basename(fp).split('_')[1].split('.')[0]+'.nc', dvi, extent, location='Switzerland')
make_nc4_groups('C:\\users\\jlandman\\desktop\\testnetCDFs\\groups\\dh_'+os.path.basename(fp).split('_')[1].split('.')[0]+'.nc', dvi_big, dvi_big, e_times, c_periods, extent, location='Switzerland')
In [ ]:
#Open existing dataset
src_ds = gdal.Open("C:\\Users\\jlandman\\Desktop\\glacier_dh_rasterized_fischer et al. 2015\\glacier_dh_rasterized_fischer et al. 2015\\dh_0997.asc")
dst_filename = 'C:\\Users\\jlandman\\Desktop\\test_gdal_ascii2nc.nc'
#Open output format driver, see gdal_translate --formats for list
format = "netCDF"
driver = gdal.GetDriverByName(format)
#Output to new format
dst_ds = driver.CreateCopy( dst_filename, src_ds, 0 )
#Properly close the datasets to flush to disk
dst_ds = None
src_ds = None
In [259]:
#Open existing dataset
src_ds_list = glob("C:\\Users\\jlandman\\Desktop\\testnetCDFs\\groups\\*.nc")
for path in src_ds_list[:10]:
src_ds = gdal.Open(path)
print(path)
subdata = src.GetSubDatasets()
for s in range(len(subdata)):
sds = gdal.Open(subdata[s][0])
dst_path = ("\\").join(os.path.dirname(path).split('\\')[:-1]) + \
"\\" + "asciifromnc" + "\\" + os.path.basename(path).split('.')[0] + \
"_" + ('_').join(subdata[s][0].split('//')[-1].split('/')) + '.asc'
print(dst_path)
# Open output format driver, see gdal_translate --formats for list
oformat = "AAIGrid"
driver = gdal.GetDriverByName(format)
# Output to new format
dst_ds = driver.CreateCopy( dst_path, sds, 0 )
#os.system("gdal_translate -of %s NETCDF:%s %s" % (oformat, sds, dst_path))
# "filename.nc":dem_bnds %s"
# Properly close the datasets to flush to disk
dst_ds = None
src_ds = None
In [267]:
sds.ReadAsArray().shape
Out[267]:
In [224]:
f = nc.Dataset("C:\\Users\\jlandman\\Desktop\\testnetCDFs\\groups\\dh_3071.nc")
In [225]:
change = f['change'].variables['change']
elevation = f['elevation'].variables['elevation']
In [226]:
change.shape
Out[226]:
In [227]:
ncdump(f)
Out[227]:
In [232]:
strucinfo(f)
In [198]:
f.getncattr('missing_value')
In [144]:
plt.imshow(elevation[0,:,:])
Out[144]:
In [201]:
f.close()
In [146]:
plt.imshow(dvi_big[:,:,0], interpolation='none', origin='lower')
Out[146]:
In [47]:
dvi_big[:,:,:].shape
Out[47]:
In [ ]:
In [233]:
src = gdal.Open('C:\\Users\\jlandman\\Desktop\\testnetCDFs\\groups\\dh_3071.nc')
In [253]:
('_').join(src.GetSubDatasets()[0][0].split('//')[-1].split('/'))
Out[253]:
In [247]:
src.GetMetadata_Dict()
Out[247]:
In [ ]: