In [1]:
import json
import urllib2
import matplotlib
s = json.load ( urllib2.urlopen("https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/raw/master/styles/bmh_matplotlibrc.json"))
matplotlib.rcParams.update(s)
figsize( 11, 9)

Inverting CHRIS/PROBA data over Barrax

So this notebook contains the working notes. Most of the code will be stored somewhere else. The aim of this demonstration is to use our GP emulator to invert observed reflectance and etimate a number of land surface parameters from the data.

The approach taken is to use Gaussian Processes, but in two different ways. First, the GP is used as a pure regressor. This results in a "first pass" estimate of parameters, that is then refined using a DA approach. In this approach, the GP is used as a RT model emulator, and is used within a Bayesian framework to solve for a posterior that combines the spectral information as well as a spatial regularisation term.

Building look up tables

Perhaps the first stage here is to build up some LUTs. I will be using random sampling, and maybe revisit this later on and use latin hypercubes or whatever, but we'll see. For the Barrax image acquired on the 12.07.2003, the following acquisition geometries are available:

  • SZA = 22.4
  • SAA = 134.7
  • VZA = 19.4, 38.78, 39.15, 55.99, 56.24
  • VAA = 102.4, 37.98, 165.44, 26.11, 177.06

First, let's set up CHRIS' spectral sampling.


In [2]:
bands_txt = """410.501      405.633      415.244      9.61120
      441.286      436.253      446.572      10.3190
      451.155      446.572      455.890      9.31790
      460.776      455.890      465.796      9.90570
      470.956      465.796      476.259      10.4620
      481.712      476.259      487.298      11.0390
      491.899      487.298      496.596      9.29790
      501.417      496.596      506.357      9.76010
      511.431      506.357      516.657      10.3000
      522.043      516.657      527.573      10.9150
      531.848      527.573      536.238      8.66460
      542.221      536.238      548.447      12.2090
      553.251      548.447      558.182      9.73490
      563.245      558.182      568.430      10.2470
      573.769      568.430      579.258      10.8280
      582.975      579.258      586.800      7.54180
      592.636      586.800      598.645      11.8440
      604.822      598.645      611.168      12.5230
      615.512      611.168      619.917      8.74920
      624.419      619.917      628.969      9.05170
      633.636      628.969      638.395      9.42590
      643.218      638.395      648.135      9.73960
      653.172      648.135      658.276      10.1410
      663.488      658.276      668.815      10.5380
      674.242      668.815      679.748      10.9320
      682.559      679.748      685.324      5.57530
      688.174      685.324      691.070      5.74610
      693.963      691.070      696.883      5.81340
      699.860      696.883      702.841      5.95720
      705.839      702.841      708.888      6.04680
      711.954      708.888      715.058      6.17070
      718.192      715.058      721.320      6.26160
      724.529      721.320      727.724      6.40440
      730.932      727.724      734.266      6.54210
      737.563      734.266      740.901      6.63450
      744.231      740.901      747.686      6.78460
      751.100      747.686      754.582      6.89580
      758.096      754.582      761.634      7.05270
      765.172      761.634      768.737      7.10220
      772.388      768.737      776.076      7.33970
      779.735      776.076      783.491      7.41460
      787.287      783.491      791.046      7.55520
      794.949      791.046      798.750      7.70350
      802.702      798.750      806.614      7.86430
      810.582      806.614      814.631      8.01700
      831.065      822.750      839.468      16.7170
      843.759      839.468      848.038      8.57030
      852.458      848.038      856.874      8.83580
      861.241      856.874      865.688      8.81430
      870.174      865.688      874.757      9.06900
      879.316      874.757      883.899      9.14150
      888.505      883.899      893.240      9.34100
      898.019      893.240      902.751      9.51130
      907.533      902.751      912.337      9.58560
      917.255      912.337      922.147      9.81040
      927.173      922.147      932.027      9.88020
      942.121      932.027      952.277      20.2490
      957.371      952.277      962.585      10.3070
      967.744      962.585      972.983      10.3980
      978.331      972.983      983.534      10.5500
      988.911      983.534      994.152      10.6180
      999.543      994.152      1004.96      10.8060"""
bands = np.fromstring ( bands_txt, sep="\n").reshape((62,4))
proba_min = bands[:, 1]
proba_max = bands[:, 2]
band_pass = np.zeros (( 62, 2101 ), dtype = np.bool )
bw = bands [ :, 3]
wv = np.arange ( 400, 2501 )
for i in xrange(62):
    band_pass[i,:] = np.logical_and ( wv >= proba_min[i], \
                      wv <= proba_max[i] )

The following snipped generates the LUTs. BRFs are stored in train_brf and parameters in train_params. Both are dictionaries that are index by the camera names.


In [3]:
from lut_tools import lut, limits, samples, brdfModel, lhs
sza = 22.4
saa = 134.7
vza = np.array ( [ 19.4, 38.78, 39.15, 55.99, 56.24] )
vaa = np.array ( [ 102.4, 37.98, 165.44, 26.11, 177.06 ] )
cams = [ "3598","3599","359A", "359B", "359C" ]
raa = vaa - saa
n_train = 200
n_bands = 62
param_names = ['bsoil' 'cbrown' 'hspot' 'n' 'psoil' 'xala' 'xcab' 'xcar' 'xcm' 'xcw'
 'xlai']
#train_params 
train_brf = np.zeros ( ( n_train, n_bands*len(cams) ) )
train_params = {}
( pmin, pmax ) = limits()
s = samples( n=n_train )
rho_b = np.zeros (( n_train, n_bands ))
for i, camera in enumerate(cams):
    a, trans_p, rho = brdfModel ( s, vza[i], sza, raa[i] )
    for j in xrange( n_bands ):
        rho_b[:, j] = rho[:, band_pass[j,:]].sum(axis=1)/bw[j]
    if i == 0:
        train_params.update( trans_p )
    train_brf[ :, (n_bands)*i:((n_bands*(i+1)))] = rho_b

We can also use latin hypercube sampling to generate the LUT...


In [150]:
import scipy.stats as stats
def lhs(dist, parms, siz=100, noCorrRestr=False, corrmat=None):
    '''
    Latin Hypercube sampling of any distribution.
    dist is is a scipy.stats random number generator 
    such as stats.norm, stats.beta, etc
    parms is a tuple with the parameters needed for 
    the specified distribution.

    :Parameters:
        - `dist`: random number generator from scipy.stats module or a list of them.
        - `parms`: tuple of parameters as required for dist, or a list of them.
        - `siz` :number or shape tuple for the output sample
        - `noCorrRestr`: if true, does not enforce correlation structure on the sample.
        - `corrmat`: Correlation matrix
    '''
    if not isinstance(dist,(list,tuple)):
        dists = [dist]
        parms = [parms]
    else:
        assert len(dist) == len(parms)
        dists = dist
    indices=rank_restr(nvars=len(dists), smp=siz, noCorrRestr=noCorrRestr, Corrmat=corrmat)
    smplist = []
    for j,d in enumerate(dists):
        if not isinstance(d, (stats.rv_discrete,stats.rv_continuous)):
            raise TypeError('dist is not a scipy.stats distribution object')
        n=siz
        if isinstance(siz,(tuple,list)):
            n=numpy.product(siz)
        #force type to float for sage compatibility
        pars = tuple([float(k) for k in parms[j]])
        #perc = numpy.arange(1.,n+1)/(n+1)
        step = 1./(n)
        perc = numpy.arange(0, 1, step) #class boundaries
        s_pos = [uniform(i, i+ step) for i in perc[:]]#[i+ step/2. for i in perc[:]]
        v = d(*pars).ppf(s_pos)
        #print len(v), step, perc
        index=map(int,indices[j]-1)
        v = v[index]
        if isinstance(siz,(tuple,list)):
            v.shape = siz
        smplist.append(v)
    if len(dists) == 1:
        return smplist[0]
    return smplist
def rank_restr(nvars=4, smp=100, noCorrRestr=False, Corrmat=None):
    """
    Returns the indices for sampling variables with  
    the desired correlation structure.
    
    :Parameters:
        - `nvars`: number of variables
        - `smp`: number of samples
        - `noCorrRestr`: No correlation restriction if True
        - `Corrmat`: Correlation matrix. If None, assure uncorrelated samples.
    """
    if isinstance(smp,(tuple,list)):
            smp=numpy.product(smp)
    def shuf(s):
        s1=[]
        for i in xrange(nvars):
            numpy.random.shuffle(s)
            s1.append(s.copy())
        return s1
    if noCorrRestr or nvars ==1:
        x = [stats.randint.rvs(0,smp+0,size=smp) for i in xrange(nvars)]
    else:
        if Corrmat == None:
            C=numpy.core.numeric.identity(nvars)
        else:
            if Corrmat.shape[0] != nvars:
                raise TypeError('Correlation matrix must be of rank %s'%nvars)
            C=numpy.matrix(Corrmat)
        s0=numpy.arange(1.,smp+1)/(smp+1.)
        s=stats.norm().ppf(s0)
        s1 = shuf(s)
        S=numpy.matrix(s1)
        P=cholesky(C)
        Q=cholesky(numpy.corrcoef(S))

        Final=S.transpose()*inv(Q).transpose()*P.transpose()
        x = [stats.stats.rankdata(Final.transpose()[i,]) for i in xrange(nvars)]
    return x

In [162]:
dist = [stats.uniform for k in param_names]
pams = [(pmin[k], pmax[k]-pmin[k]) for k in param_names]
lhs_params = lhs ( dist, pams, siz=200)
s = dict ( zip( param_names, lhs_params ))
s = invtransform ( s)
train_brf = np.zeros ( ( n_train, n_bands*len(cams) ) )
train_params = {}
rho_b = np.zeros (( n_train, n_bands ))
for i, camera in enumerate(cams):
    a, trans_p, rho = brdfModel ( s, vza[i], sza, raa[i] )
    for j in xrange( n_bands ):
        rho_b[:, j] = rho[:, band_pass[j,:]].sum(axis=1)/bw[j]
    if i == 0:
        train_params.update( trans_p )
    train_brf[ :, (n_bands)*i:((n_bands*(i+1)))] = rho_b

The training samples are now available. We can use that to train the "inverse GPs", in the UCL parlance. Note that these LUTs span the whole of parameter space, i.e., we aren't just selecting a few parameters. Weird stuff like Dry Matter and things are also being paraded around. The other interesting thing is that the parameters are in transformed space.

Let's just build some invese emulators. For this, we need GaussianProcess.


In [58]:
from GaussianProcess import GaussianProcess
from lut_tools import unpack
parameters, param_names = unpack ( p )

gp_regressor = {}
for param_name in param_names:
    print "\t\t", param_name,
    gp = GaussianProcess ( train_brf, \
                            train_params[param_name])
    gp_regressor[ param_name ] = gp
    gp_regressor[param_name].learn_hyperparameters( n_tries=2 )
    print "Done!"


		bsoil 
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-58-0a34347bbd6b> in <module>()
      8     gp = GaussianProcess ( train_brf,                             train_params[param_name])
      9     gp_regressor[ param_name ] = gp
---> 10     gp_regressor[param_name].learn_hyperparameters( n_tries=2 )
     11     print "Done!"
     12 

/data/raid5/raidflex1/ucfajlg/python/eoldas_ng/notebooks/GaussianProcess.pyc in learn_hyperparameters(self, n_tries)
    112         params = []
    113         for theta in 5.*(np.random.rand(n_tries, self.D+2) - 0.5):
--> 114             T = self._learn ( theta )
    115             log_like.append ( T[1] )
    116             params.append ( T[0] )

/data/raid5/raidflex1/ucfajlg/python/eoldas_ng/notebooks/GaussianProcess.pyc in _learn(self, theta0)
    102             theta_opt = fmin_l_bfgs_b(  self.loglikelihood, \
    103                      theta0, fprime = self.partial_devs, \
--> 104                      factr=0.1, pgtol=1e-20,iprint=1)
    105         except np.linalg.LinAlgError:
    106             theta_opt = [ theta0, 99999999]

/opt/epd-7.1-2-rh5-x86_64/lib/python2.7/site-packages/scipy/optimize/lbfgsb.py in fmin_l_bfgs_b(func, x0, fprime, args, approx_grad, bounds, m, factr, pgtol, epsilon, iprint, maxfun, disp)
    194             n_function_evals += 1
    195             # Overwrite f and g:
--> 196             f, g = func_and_grad(x)
    197         elif task_str.startswith(asbytes('NEW_X')):
    198             # new iteration

/opt/epd-7.1-2-rh5-x86_64/lib/python2.7/site-packages/scipy/optimize/lbfgsb.py in func_and_grad(x)
    149     else:
    150         def func_and_grad(x):
--> 151             f = func(x, *args)
    152             g = fprime(x, *args)
    153             return f, g

/data/raid5/raidflex1/ucfajlg/python/eoldas_ng/notebooks/GaussianProcess.pyc in loglikelihood(self, theta)
     57 
     58     def loglikelihood ( self, theta ):
---> 59         self._set_params ( theta )
     60         loglikelihood = 0.5*self.logdetQ + \
     61                         0.5*np.dot ( self.targets, self.invQt ) + \

/data/raid5/raidflex1/ucfajlg/python/eoldas_ng/notebooks/GaussianProcess.pyc in _set_params(self, theta)
     87 
     88         self.theta = theta
---> 89         self._prepare_likelihood ( )
     90 
     91     def _learn ( self, theta0 ):

/data/raid5/raidflex1/ucfajlg/python/eoldas_ng/notebooks/GaussianProcess.pyc in _prepare_likelihood(self)
     49         self.Q = self.Z +\
     50             exp_theta[self.D+1]*np.eye ( self.n )
---> 51         self.invQ = np.linalg.inv ( self.Q )
     52         self.invQt = np.dot ( self.invQ, self.targets )
     53 

/opt/epd-7.1-2-rh5-x86_64/lib/python2.7/site-packages/numpy/linalg/linalg.pyc in inv(a)
    443     """
    444     a, wrap = _makearray(a)
--> 445     return wrap(solve(a, identity(a.shape[0], dtype=a.dtype)))
    446 
    447 

/opt/epd-7.1-2-rh5-x86_64/lib/python2.7/site-packages/numpy/linalg/linalg.pyc in solve(a, b)
    324     a, b = _to_native_byte_order(a, b)
    325     pivots = zeros(n_eq, fortran_int)
--> 326     results = lapack_routine(n_eq, n_rhs, a, n_eq, pivots, b, n_eq, 0)
    327     if results['info'] > 0:
    328         raise LinAlgError, 'Singular matrix'

KeyboardInterrupt: 
Done!
		cbrown

In [56]:
#print gp_regressor['3598']['xlai'].predict ( np.atleast_2d(train_brf[camera][10,:]))[:2]
#print train_params['3598']['xlai'][10]


Out[56]:
[[<matplotlib.lines.Line2D at 0x195e7e90>],
 [<matplotlib.lines.Line2D at 0x195e7ed0>],
 [<matplotlib.lines.Line2D at 0x195e5250>],
 [<matplotlib.lines.Line2D at 0x195e5610>],
 [<matplotlib.lines.Line2D at 0x195e59d0>]]

Opening the data an playing around with it

So now, we have a a regressor that maps from $\rho^{\lambda_{i}}(\Omega, \Omega'_{j})$ to the parameters of interest (LAI, etc.).Let's first plot an RGB composite using bands 18, 14 and 5:


In [102]:
import os
import gdal
proba_dir = "/data/netapp_4/ucfajlg/BARRAX/SPARC/" + \
    "SPARC_DB/SPARC2003/PROBA/corregidasAtm/corregidasAtm_120703"
g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_3599.tif" ) )
R = g.GetRasterBand ( 18 ).ReadAsArray()/10000.
G = g.GetRasterBand ( 14 ).ReadAsArray()/10000.
B = g.GetRasterBand ( 5 ).ReadAsArray()/10000.
RGB = np.dstack ( (R, G, B ))
plt.imshow ( RGB, interpolation='nearest')
plt.plot ( 150, 350, 'rx' )
plt.plot ( 250, 150, 'rx' )


Out[102]:
[<matplotlib.lines.Line2D at 0x2b88a5900fd0>]

In [107]:
import cPickle
fp = open("proba_regressors.pkl", 'r')
f = cPickle.load ( fp )
fp.close()

gp_regressor = f['gp_regressor']
sel_bands = f['sel_bands']

In [142]:
rho = []
for camera in cams:
    g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_%s.tif" % camera ) )
    buf = g.ReadRaster(151, 351, 1, 1)
    rho.append(np.frombuffer(buf, dtype=np.int16).T/10000.)
    plt.plot ( np.frombuffer(buf, dtype=np.int16).T/10000.)
    
rho = np.array(rho)
rho = np.atleast_2d(np.r_[ rho[0,:], rho[1,:], rho[2,:], rho[3,:], rho[4,:]] )[:,sel_bands['xlai']]

lai, lai_unc, lai_der = gp_regressor['xlai'].predict( rho)
print "Posterior mean: %g" % (-2.*np.log(lai ))
print "Posterior 5%% CI: %g" % (-2.*np.log(lai-1.96*lai_unc ))
print "Posterior 95%% CI: %g" % (-2.*np.log(lai+1.96*lai_unc ))


rho = []
for camera in cams:
    g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_%s.tif" % camera ) )
    buf = g.ReadRaster(250, 150, 1, 1)
    rho.append(np.frombuffer(buf, dtype=np.int16).T/10000.)
    plt.plot ( np.frombuffer(buf, dtype=np.int16).T/10000.)
rho = np.array(rho)
rho = np.atleast_2d(np.r_[ rho[0,:], rho[1,:], rho[2,:], rho[3,:], rho[4,:]] )[:,sel_bands['xlai']]
lai, lai_unc, lai_der = gp_regressor['xlai'].predict( rho )
if lai < 0:
    lai = 1
#lai, lai_unc, lai_der = xlai_emu.predict( np.atleast_2d(rho))
print lai
print "Posterior mean: %g" % (-2.*np.log(lai ))
print "Posterior 5%% CI: %g" % (-2.*np.log(lai-1.96*lai_unc ))
print "Posterior 95%% CI: %g" % (-2.*np.log(lai+1.96*lai_unc ))


Posterior mean: 2.21388
Posterior 5% CI: 2.30154
Posterior 95% CI: 2.12989
1
Posterior mean: -0
Posterior 5% CI: 0.258211
Posterior 95% CI: -0.228654

In [131]:
print -2*np.log(lai)


[ 1.91030607]

In [101]:
rho = []


Posterior mean: 0.656107
Posterior 5% CI: 0.661029
Posterior 95% CI: 0.651221

In [ ]: