In [1]:
import json
import urllib2
import matplotlib
s = json.load ( urllib2.urlopen("https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/raw/master/styles/bmh_matplotlibrc.json"))
matplotlib.rcParams.update(s)
figsize( 11, 9)
So this notebook contains the working notes. Most of the code will be stored somewhere else. The aim of this demonstration is to use our GP emulator to invert observed reflectance and etimate a number of land surface parameters from the data.
The approach taken is to use Gaussian Processes, but in two different ways. First, the GP is used as a pure regressor. This results in a "first pass" estimate of parameters, that is then refined using a DA approach. In this approach, the GP is used as a RT model emulator, and is used within a Bayesian framework to solve for a posterior that combines the spectral information as well as a spatial regularisation term.
Perhaps the first stage here is to build up some LUTs. I will be using random sampling, and maybe revisit this later on and use latin hypercubes or whatever, but we'll see. For the Barrax image acquired on the 12.07.2003, the following acquisition geometries are available:
First, let's set up CHRIS' spectral sampling.
In [2]:
bands_txt = """410.501 405.633 415.244 9.61120
441.286 436.253 446.572 10.3190
451.155 446.572 455.890 9.31790
460.776 455.890 465.796 9.90570
470.956 465.796 476.259 10.4620
481.712 476.259 487.298 11.0390
491.899 487.298 496.596 9.29790
501.417 496.596 506.357 9.76010
511.431 506.357 516.657 10.3000
522.043 516.657 527.573 10.9150
531.848 527.573 536.238 8.66460
542.221 536.238 548.447 12.2090
553.251 548.447 558.182 9.73490
563.245 558.182 568.430 10.2470
573.769 568.430 579.258 10.8280
582.975 579.258 586.800 7.54180
592.636 586.800 598.645 11.8440
604.822 598.645 611.168 12.5230
615.512 611.168 619.917 8.74920
624.419 619.917 628.969 9.05170
633.636 628.969 638.395 9.42590
643.218 638.395 648.135 9.73960
653.172 648.135 658.276 10.1410
663.488 658.276 668.815 10.5380
674.242 668.815 679.748 10.9320
682.559 679.748 685.324 5.57530
688.174 685.324 691.070 5.74610
693.963 691.070 696.883 5.81340
699.860 696.883 702.841 5.95720
705.839 702.841 708.888 6.04680
711.954 708.888 715.058 6.17070
718.192 715.058 721.320 6.26160
724.529 721.320 727.724 6.40440
730.932 727.724 734.266 6.54210
737.563 734.266 740.901 6.63450
744.231 740.901 747.686 6.78460
751.100 747.686 754.582 6.89580
758.096 754.582 761.634 7.05270
765.172 761.634 768.737 7.10220
772.388 768.737 776.076 7.33970
779.735 776.076 783.491 7.41460
787.287 783.491 791.046 7.55520
794.949 791.046 798.750 7.70350
802.702 798.750 806.614 7.86430
810.582 806.614 814.631 8.01700
831.065 822.750 839.468 16.7170
843.759 839.468 848.038 8.57030
852.458 848.038 856.874 8.83580
861.241 856.874 865.688 8.81430
870.174 865.688 874.757 9.06900
879.316 874.757 883.899 9.14150
888.505 883.899 893.240 9.34100
898.019 893.240 902.751 9.51130
907.533 902.751 912.337 9.58560
917.255 912.337 922.147 9.81040
927.173 922.147 932.027 9.88020
942.121 932.027 952.277 20.2490
957.371 952.277 962.585 10.3070
967.744 962.585 972.983 10.3980
978.331 972.983 983.534 10.5500
988.911 983.534 994.152 10.6180
999.543 994.152 1004.96 10.8060"""
bands = np.fromstring ( bands_txt, sep="\n").reshape((62,4))
proba_min = bands[:, 1]
proba_max = bands[:, 2]
band_pass = np.zeros (( 62, 2101 ), dtype = np.bool )
bw = bands [ :, 3]
wv = np.arange ( 400, 2501 )
for i in xrange(62):
band_pass[i,:] = np.logical_and ( wv >= proba_min[i], \
wv <= proba_max[i] )
The following snipped generates the LUTs. BRFs are stored in train_brf and parameters in train_params. Both are dictionaries that are index by the camera names.
In [3]:
from lut_tools import lut, limits, samples, brdfModel, lhs
sza = 22.4
saa = 134.7
vza = np.array ( [ 19.4, 38.78, 39.15, 55.99, 56.24] )
vaa = np.array ( [ 102.4, 37.98, 165.44, 26.11, 177.06 ] )
cams = [ "3598","3599","359A", "359B", "359C" ]
raa = vaa - saa
n_train = 200
n_bands = 62
param_names = ['bsoil' 'cbrown' 'hspot' 'n' 'psoil' 'xala' 'xcab' 'xcar' 'xcm' 'xcw'
'xlai']
#train_params
train_brf = np.zeros ( ( n_train, n_bands*len(cams) ) )
train_params = {}
( pmin, pmax ) = limits()
s = samples( n=n_train )
rho_b = np.zeros (( n_train, n_bands ))
for i, camera in enumerate(cams):
a, trans_p, rho = brdfModel ( s, vza[i], sza, raa[i] )
for j in xrange( n_bands ):
rho_b[:, j] = rho[:, band_pass[j,:]].sum(axis=1)/bw[j]
if i == 0:
train_params.update( trans_p )
train_brf[ :, (n_bands)*i:((n_bands*(i+1)))] = rho_b
We can also use latin hypercube sampling to generate the LUT...
In [150]:
import scipy.stats as stats
def lhs(dist, parms, siz=100, noCorrRestr=False, corrmat=None):
'''
Latin Hypercube sampling of any distribution.
dist is is a scipy.stats random number generator
such as stats.norm, stats.beta, etc
parms is a tuple with the parameters needed for
the specified distribution.
:Parameters:
- `dist`: random number generator from scipy.stats module or a list of them.
- `parms`: tuple of parameters as required for dist, or a list of them.
- `siz` :number or shape tuple for the output sample
- `noCorrRestr`: if true, does not enforce correlation structure on the sample.
- `corrmat`: Correlation matrix
'''
if not isinstance(dist,(list,tuple)):
dists = [dist]
parms = [parms]
else:
assert len(dist) == len(parms)
dists = dist
indices=rank_restr(nvars=len(dists), smp=siz, noCorrRestr=noCorrRestr, Corrmat=corrmat)
smplist = []
for j,d in enumerate(dists):
if not isinstance(d, (stats.rv_discrete,stats.rv_continuous)):
raise TypeError('dist is not a scipy.stats distribution object')
n=siz
if isinstance(siz,(tuple,list)):
n=numpy.product(siz)
#force type to float for sage compatibility
pars = tuple([float(k) for k in parms[j]])
#perc = numpy.arange(1.,n+1)/(n+1)
step = 1./(n)
perc = numpy.arange(0, 1, step) #class boundaries
s_pos = [uniform(i, i+ step) for i in perc[:]]#[i+ step/2. for i in perc[:]]
v = d(*pars).ppf(s_pos)
#print len(v), step, perc
index=map(int,indices[j]-1)
v = v[index]
if isinstance(siz,(tuple,list)):
v.shape = siz
smplist.append(v)
if len(dists) == 1:
return smplist[0]
return smplist
def rank_restr(nvars=4, smp=100, noCorrRestr=False, Corrmat=None):
"""
Returns the indices for sampling variables with
the desired correlation structure.
:Parameters:
- `nvars`: number of variables
- `smp`: number of samples
- `noCorrRestr`: No correlation restriction if True
- `Corrmat`: Correlation matrix. If None, assure uncorrelated samples.
"""
if isinstance(smp,(tuple,list)):
smp=numpy.product(smp)
def shuf(s):
s1=[]
for i in xrange(nvars):
numpy.random.shuffle(s)
s1.append(s.copy())
return s1
if noCorrRestr or nvars ==1:
x = [stats.randint.rvs(0,smp+0,size=smp) for i in xrange(nvars)]
else:
if Corrmat == None:
C=numpy.core.numeric.identity(nvars)
else:
if Corrmat.shape[0] != nvars:
raise TypeError('Correlation matrix must be of rank %s'%nvars)
C=numpy.matrix(Corrmat)
s0=numpy.arange(1.,smp+1)/(smp+1.)
s=stats.norm().ppf(s0)
s1 = shuf(s)
S=numpy.matrix(s1)
P=cholesky(C)
Q=cholesky(numpy.corrcoef(S))
Final=S.transpose()*inv(Q).transpose()*P.transpose()
x = [stats.stats.rankdata(Final.transpose()[i,]) for i in xrange(nvars)]
return x
In [162]:
dist = [stats.uniform for k in param_names]
pams = [(pmin[k], pmax[k]-pmin[k]) for k in param_names]
lhs_params = lhs ( dist, pams, siz=200)
s = dict ( zip( param_names, lhs_params ))
s = invtransform ( s)
train_brf = np.zeros ( ( n_train, n_bands*len(cams) ) )
train_params = {}
rho_b = np.zeros (( n_train, n_bands ))
for i, camera in enumerate(cams):
a, trans_p, rho = brdfModel ( s, vza[i], sza, raa[i] )
for j in xrange( n_bands ):
rho_b[:, j] = rho[:, band_pass[j,:]].sum(axis=1)/bw[j]
if i == 0:
train_params.update( trans_p )
train_brf[ :, (n_bands)*i:((n_bands*(i+1)))] = rho_b
The training samples are now available. We can use that to train the "inverse GPs", in the UCL parlance. Note that these LUTs span the whole of parameter space, i.e., we aren't just selecting a few parameters. Weird stuff like Dry Matter and things are also being paraded around. The other interesting thing is that the parameters are in transformed space.
Let's just build some invese emulators. For this, we need GaussianProcess.
In [58]:
from GaussianProcess import GaussianProcess
from lut_tools import unpack
parameters, param_names = unpack ( p )
gp_regressor = {}
for param_name in param_names:
print "\t\t", param_name,
gp = GaussianProcess ( train_brf, \
train_params[param_name])
gp_regressor[ param_name ] = gp
gp_regressor[param_name].learn_hyperparameters( n_tries=2 )
print "Done!"
In [56]:
#print gp_regressor['3598']['xlai'].predict ( np.atleast_2d(train_brf[camera][10,:]))[:2]
#print train_params['3598']['xlai'][10]
Out[56]:
So now, we have a a regressor that maps from $\rho^{\lambda_{i}}(\Omega, \Omega'_{j})$ to the parameters of interest (LAI, etc.).Let's first plot an RGB composite using bands 18, 14 and 5:
In [102]:
import os
import gdal
proba_dir = "/data/netapp_4/ucfajlg/BARRAX/SPARC/" + \
"SPARC_DB/SPARC2003/PROBA/corregidasAtm/corregidasAtm_120703"
g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_3599.tif" ) )
R = g.GetRasterBand ( 18 ).ReadAsArray()/10000.
G = g.GetRasterBand ( 14 ).ReadAsArray()/10000.
B = g.GetRasterBand ( 5 ).ReadAsArray()/10000.
RGB = np.dstack ( (R, G, B ))
plt.imshow ( RGB, interpolation='nearest')
plt.plot ( 150, 350, 'rx' )
plt.plot ( 250, 150, 'rx' )
Out[102]:
In [107]:
import cPickle
fp = open("proba_regressors.pkl", 'r')
f = cPickle.load ( fp )
fp.close()
gp_regressor = f['gp_regressor']
sel_bands = f['sel_bands']
In [142]:
rho = []
for camera in cams:
g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_%s.tif" % camera ) )
buf = g.ReadRaster(151, 351, 1, 1)
rho.append(np.frombuffer(buf, dtype=np.int16).T/10000.)
plt.plot ( np.frombuffer(buf, dtype=np.int16).T/10000.)
rho = np.array(rho)
rho = np.atleast_2d(np.r_[ rho[0,:], rho[1,:], rho[2,:], rho[3,:], rho[4,:]] )[:,sel_bands['xlai']]
lai, lai_unc, lai_der = gp_regressor['xlai'].predict( rho)
print "Posterior mean: %g" % (-2.*np.log(lai ))
print "Posterior 5%% CI: %g" % (-2.*np.log(lai-1.96*lai_unc ))
print "Posterior 95%% CI: %g" % (-2.*np.log(lai+1.96*lai_unc ))
rho = []
for camera in cams:
g = gdal.Open ( os.path.join ( proba_dir, "refl_030712_%s.tif" % camera ) )
buf = g.ReadRaster(250, 150, 1, 1)
rho.append(np.frombuffer(buf, dtype=np.int16).T/10000.)
plt.plot ( np.frombuffer(buf, dtype=np.int16).T/10000.)
rho = np.array(rho)
rho = np.atleast_2d(np.r_[ rho[0,:], rho[1,:], rho[2,:], rho[3,:], rho[4,:]] )[:,sel_bands['xlai']]
lai, lai_unc, lai_der = gp_regressor['xlai'].predict( rho )
if lai < 0:
lai = 1
#lai, lai_unc, lai_der = xlai_emu.predict( np.atleast_2d(rho))
print lai
print "Posterior mean: %g" % (-2.*np.log(lai ))
print "Posterior 5%% CI: %g" % (-2.*np.log(lai-1.96*lai_unc ))
print "Posterior 95%% CI: %g" % (-2.*np.log(lai+1.96*lai_unc ))
In [131]:
print -2*np.log(lai)
In [101]:
rho = []
In [ ]: