In [6]:
import numpy as np
import os
import gdal, osr
import matplotlib.pyplot as plt
import sys
import matplotlib.pyplot as plt
from scipy import ndimage as ndi
%matplotlib inline

In [7]:
#Import biomass specific libraries
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from skimage.measure import regionprops
from sklearn.ensemble import RandomForestRegressor

In [8]:
chm_file = 'NEON_D17_SJER_DP3_256000_4106000_CHM.tif'
chm_dataset = gdal.Open(chm_file)

In [9]:
chm_raster = chm_dataset.GetRasterBand(1)
chm_array = chm_raster.ReadAsArray(0,0,chm_dataset.RasterXSize,chm_dataset.RasterYSize).astype(np.float)

In [10]:
np.min(chm_array)


Out[10]:
0.0

In [11]:
#Smooth the CHM using a gaussian filter to remove spurious points
chm_array_smooth = ndi.gaussian_filter(chm_array, 2, mode='constant', cval=0, truncate=2.0)
chm_array_smooth[chm_array==0] = 0

In [12]:
#Calculate local maximum points in the smoothed CHM
local_maxi = peak_local_max(chm_array_smooth,indices=False, footprint=np.ones((5, 5)))

In [13]:
#Identify all the maximum points
markers = ndi.label(local_maxi)[0]

In [14]:
#Create a CHM mask so the segmentation will only occur on the trees
chm_mask = chm_array_smooth
chm_mask[chm_array_smooth != 0] = 1

In [108]:
#Perfrom watershed segmentation        
labels = watershed(chm_array_smooth, markers, mask=chm_mask)

In [109]:
#Get the properties of each segment
tree_properties = regionprops(labels, chm_array, ['Area','BoundingBox','Centroid','Orientation','MajorAxisLength','MinorAxisLength','MaxIntensity','MinIntensity'])

In [110]:
def get_predictors(tree, chm_array, labels):
    indexes_of_tree = np.asarray(np.where(labels==tree.label)).T
    tree_data = chm_array[indexes_of_tree[:,0],indexes_of_tree[:,1]]
    full_crown = np.sum(tree_data - np.min(tree_data))
    
    def crown_geometric_volume_pth(pth):
        p = np.percentile(tree_data, pth)
        tree_data_pth = [v if v < p else p for v in tree_data]
        crown_geometric_volume_pth = np.sum(tree_data_pth - tree.min_intensity)
        return crown_geometric_volume_pth, p
   
    crown50, p50 = crown_geometric_volume_pth(50)
    crown60, p60 = crown_geometric_volume_pth(60)
    crown70, p70 = crown_geometric_volume_pth(70)
    
    return [tree.label,
            np.float(tree.area),
            tree.major_axis_length,
            tree.max_intensity,
            tree.min_intensity, 
            p50, p60, p70,
            full_crown, crown50, crown60, crown70]

In [111]:
predictors_mb = [get_predictors(tree, chm_array, labels) for tree in tree_properties]
all_training_data_mb = np.array([x[1:] for x in predictors_mb])

In [35]:
#Define the file of training data  
training_data_file = 'SJER_Biomass_Training.csv'

#Read in the training data from a CSV file
training_data = np.genfromtxt(training_data_file,delimiter=',') 

#Grab the biomass (Y) from the first line
biomass = training_data[:,0]

#Grab the biomass prdeictors from the remaining lines
biomass_predictors = training_data[:,1:12]

In [36]:
#Define paraemters for Random forest regressor
max_depth = 30

#Define regressor rules
regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)

#Fit the biomass to regressor variables
regr_rf.fit(biomass_predictors,biomass)


Out[36]:
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=30,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=2,
           verbose=0, warm_start=False)

In [114]:
#Stack the predictor variables for all the individual trees
all_training_data = np.stack([area,
                              diameter,
                              max_tree_height,
                              min_tree_height,
                              percentile_50th,
                              percentile_60th,
                              percentile_70th,
                              crown_geometric_volume_full,
                              crown_geometric_volume_50th_percentile,
                              crown_geometric_volume_60th_percentile,
                              crown_geometric_volume_70th_percentile],axis=-1)

In [115]:
pred_biomass_mb = regr_rf.predict(all_training_data_mb)

In [123]:
biomass_out_mb = labels
for p, bm in zip(predictors_mb, pred_biomass_mb):
    biomass_out_mb[biomass_out_mb==p[0]] = bm

In [136]:
for ii, (x, y) in enumerate(zip(biomass_out_mb, biomass_out)):
    if np.sum(x==y)<len(x):
        print(ii)

In [43]:
#Get biomass stats for plotting
mean_biomass = np.mean(pred_biomass)
std_biomass = np.std(pred_biomass)
min_biomass = np.min(pred_biomass)
sum_biomass = np.sum(pred_biomass)

print('Sum of biomass is ',sum_biomass,' kg')


Sum of biomass is  6978251.34548  kg

In [ ]:
Sum of biomass is  6978251.34548  kg

In [ ]: