Note:

sampler.py requirements:

- sklearn (kd-tree)
- cartopy (plotting)
- matplotlib 

In [15]:
import os 
import sampler
import numpy as np

%matplotlib inline

Retrieve and parse a text product containing observations:

Retrieve and parse a text product from the Australian Bureau of Meterology.

ftp://ftp2.bom.gov.au/anon/gen/fwo/IDY03021.txt

In [16]:
obs = sampler.dataframe_observations()
print(obs.head())


        station    lat     lon  day  hour  vis  cld wind_dir wind_mag  temp  \
2  Albury AP    -36.07  146.95    2   100   10  NaN       NE      003     7   
3  Albury AP    -36.07  146.95    2   100  NaN  NaN       NE      003     7   
5  Armidale     -30.52  151.67    1  2300   50  NaN       SW      006     2   
6  Armidale A   -30.53  151.62    2   100   10  NaN        S      008     6   
7  Armidale A   -30.53  151.62    2   100  NaN  NaN        S      008     6   

   rh     bar rain_mm rain_hr weather  tx  tn  
2  74  1030.0     0.0      9>     NaN NaN NaN  
3  74  1030.3     NaN     NaN     NaN NaN NaN  
5  67     NaN     0.0      24    Clr   17  -1  
6  45  1023.6     0.0      9>     NaN NaN NaN  
7  45  1023.4     NaN     NaN     NaN NaN NaN  

In [17]:
sampler.plot_data(obs.lat, obs.lon, obs.temp, overlay=False, S=50)


Out[17]:
<matplotlib.axes.GeoAxesSubplot at 0x112fb0950>

Form a fast nearest neighbour sampler (kd-tree)


In [18]:
tree = sampler.sample_tree(obs.lat.values, obs.lon.values)

Form a high-resolution lat/lon grid


In [19]:
lons, lats = np.meshgrid(np.linspace(105.5, 160.4375, 1000), 
                         np.linspace(-49.5, -4.0625, 1000))

Estimate the (whole) temperature grid


In [20]:
def estimate(tree, obs, coords):
    
    distance, obs_values, _ = sampler.nearest_neighbour(
                                        tree, 
                                        obs.temp.values, 
                                        coords[0], 
                                        coords[1], 
                                        K=10)
    
    weights = 1. / distance**2
    
    temp_vector = (np.nansum((weights * obs_values), axis=1) / 
                   np.nansum(weights, axis=1))
    
    return temp_vector, obs_values

In [21]:
%%time 

temp_vector, _values = estimate(tree, obs, (lats, lons))


CPU times: user 3.61 s, sys: 297 ms, total: 3.9 s
Wall time: 4.09 s

In [22]:
ax = sampler.plot_data(lats, lons, np.reshape(temp_vector, lons.shape))

sampler.plot_data(obs.lat, obs.lon, obs.temp, S=50, ax=ax, cbar=False)


Out[22]:
<matplotlib.axes.GeoAxesSubplot at 0x10d9d2c50>

Chunk the input grid


In [23]:
N = 20

lat_chunks = np.array_split(lats, N)
lon_chunks = np.array_split(lons, N)

In [24]:
%timeit estimate(tree, obs, (lat_chunks[0], lon_chunks[1]))


1 loops, best of 3: 221 ms per loop

In [25]:
from IPython.parallel import Client

c = Client(profile='default')

direct = c.direct_view()

balanced = c.load_balanced_view()

Make a remote function


In [26]:
@balanced.remote()
def parallel_estimate(tree, obs, coords):
    
    import sys; sys.path.append(notebook_path)
    import sampler
    import numpy as np
    
    distance, obs_values, _ = sampler.nearest_neighbour(
                                        tree, 
                                        obs.temp.values, 
                                        coords[0], 
                                        coords[1],
                                        K=10)
    weights = 1. / distance**2
    
    temp_vector = (np.nansum((weights * obs_values), axis=1) / 
                   np.nansum(weights, axis=1))
    
    return temp_vector

Push shared state and process chunks


In [27]:
%%time

direct.push({'notebook_path': os.getcwd()})

parallel_result = [parallel_estimate(tree, obs, x) for x in 
                       zip(lat_chunks, lon_chunks)]

temp_vector = np.c_[[x.result for x in parallel_result]]


CPU times: user 766 ms, sys: 129 ms, total: 895 ms
Wall time: 2.22 s

In [28]:
ax = sampler.plot_data(lats, lons, np.reshape(temp_vector, lons.shape))

sampler.plot_data(obs.lat, obs.lon, obs.temp, S=50, ax=ax, cbar=False)


Out[28]:
<matplotlib.axes.GeoAxesSubplot at 0x10c2b9650>

In [28]: