In [1]:
import os
import netCDF4
import scipy.interpolate
import h5py
import numpy as np
import matplotlib.pyplot as plt
Let's read some radar data. Preferably these would be stored by another model or radar measurement station on an opendap site. Then we could query the catalog for the latest file. For now, we'll read it from my home directory

In [2]:
radardir = '/home/fedor/Downloads/radar'

In [3]:
# Let's see what we have
root, dirs, files = os.walk(radardir).next()
filenames = [os.path.join(root, filename) for filename in sorted(files)]
Let's inspect the files. Open the files, read the precipitation field and show it on a map, rescaled, because it doesn't rain that much in may. I haven't found the units yet....

In [4]:
# create all the subplots
fig, axes = plt.subplots(1, len(filenames), figsize=(4*len(filenames),3))
for i, (ax, filename) in enumerate(zip(axes, filenames)):
    # open the file
    h5 = h5py.File(filename)
    # look up the variable
    precipitation = h5['precipitation']
    # read it
    arr = precipitation[:,:]
    # mask it and scale it
    A = np.log10(1+np.ma.masked_array(arr, mask=arr==h5.attrs['fill_value']))
    # the date is in the title
    ax.set_title(os.path.split(filename)[-1].split('_')[-1][:8])
    # plot and colorbar
    # the images are 0 top, negative pixelsize for y
    im = ax.imshow(A, cmap=matplotlib.cm.Blues) 
    plt.colorbar(im, ax=ax)


-c:11: RuntimeWarning: invalid value encountered in log10

In [5]:
# lookup the grid 
h5['overview'].attrs.keys()
dict(h5['geographic']['map_projection'].attrs)
# It seems that this is the thing we need, not sure
# which standard is used, not H5 EOS it seems....
h5.attrs['grid_extent']
dict(h5.attrs)
# Let's extract
x0, x1, y0, y1 = h5.attrs['grid_extent']
# Assumption....
x = np.linspace(x0,x1, num=precipitation.shape[1], endpoint=False)
y = np.linspace(y0,y1, num=precipitation.shape[0], endpoint=False)

In [6]:
# Let's create X and Y array, so we generalize it a bit
X, Y = meshgrid(x,y)
# Plot this one in detail
fig, ax = plt.subplots(figsize=(24,16))
# Coordinates seem correct and are in EPSG:28992 it seems
im = ax.pcolormesh(X,Y, A, cmap='Blues')
plt.colorbar(im, ax=ax)


Out[6]:
<matplotlib.colorbar.Colorbar instance at 0x718acb0>

In [7]:
# Let's define our interpolation function.
# Very many choices here, let's do it the fastest way....
I = scipy.interpolate.NearestNDInterpolator(np.c_[X.ravel(), Y.ravel()], A.ravel())

In [8]:
# Get the coordinates of the cells that we want to put rain on
# Assuming regular grid for now
# But can be any point based value
# xi = subgrid.get_nd('xc')
# yi = subgrid.get_nd('yc')
xi = np.linspace(-100000, 400000, num=3500)
yi = np.linspace(200000, 700000, num =3000)

Xi, Yi = np.meshgrid(xi, yi)
Ai = I(Xi,Yi)

In [9]:
fig, axes = plt.subplots(1,2, figsize=(10,4) )
im1 = axes[0].pcolorfast(x, y, A, cmap=matplotlib.cm.Greens, alpha=0.3)
axes[0].set_title('original')
im2 = axes[1].pcolorfast(xi[::10], yi[::10], Ai[::10,::10], cmap=matplotlib.cm.Oranges, alpha=0.3)
axes[1].set_title('interpolated')
cb = plt.colorbar(im1, ax=axes[0])
cb = plt.colorbar(im2, ax=axes[1])



In [10]:
%%prun

# Now store the variable in the model
ds = netCDF4.Dataset("diskless.nc", mode="w", diskless=True, persist=False)

ds.createDimension("nx", Ai.shape[1])
ds.createDimension("ny", Ai.shape[0])

var = ds.createVariable("x", datatype="double", dimensions=("nx",))
var[:] = xi
var.standard_name = 'projected_x_coordinate'
var.units = 'm'

var = ds.createVariable("y", datatype="double", dimensions=("ny", ))
var[:] = yi
var.standard_name = 'projected_y_coordinate'
var.units = 'm'

var = ds.createVariable("precipitation", datatype="double", dimensions=("nx", "ny"), fill_value=h5.attrs['fill_value'])
var[:,:] = Ai.filled(h5.attrs['fill_value'])
var.standard_name = 'precipitation'
var.units = 'mm3/s'

# Don't close the dataset before the model has done the timestep
#subgrid.update()
# Clears the memory
ds.close()

# Total file size [81Mb]
# 70ms copying memory
# diskless=False -> 4.61s/loop [USB]
# diskless=False -> 482ms/loop [SSD] [250Mb/s] 
# diskless=True -> 201ms/loop [memory]




In [ ]: