In [6]:
import numpy as np
import os
import gdal, osr
import matplotlib.pyplot as plt
import sys
import matplotlib.pyplot as plt
from scipy import ndimage as ndi
%matplotlib inline
In [7]:
#Import biomass specific libraries
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from skimage.measure import regionprops
from sklearn.ensemble import RandomForestRegressor
In [8]:
chm_file = 'NEON_D17_SJER_DP3_256000_4106000_CHM.tif'
chm_dataset = gdal.Open(chm_file)
In [9]:
chm_raster = chm_dataset.GetRasterBand(1)
chm_array = chm_raster.ReadAsArray(0,0,chm_dataset.RasterXSize,chm_dataset.RasterYSize).astype(np.float)
In [10]:
np.min(chm_array)
Out[10]:
In [11]:
#Smooth the CHM using a gaussian filter to remove spurious points
chm_array_smooth = ndi.gaussian_filter(chm_array, 2, mode='constant', cval=0, truncate=2.0)
chm_array_smooth[chm_array==0] = 0
In [12]:
#Calculate local maximum points in the smoothed CHM
local_maxi = peak_local_max(chm_array_smooth,indices=False, footprint=np.ones((5, 5)))
In [13]:
#Identify all the maximum points
markers = ndi.label(local_maxi)[0]
In [14]:
#Create a CHM mask so the segmentation will only occur on the trees
chm_mask = chm_array_smooth
chm_mask[chm_array_smooth != 0] = 1
In [108]:
#Perfrom watershed segmentation
labels = watershed(chm_array_smooth, markers, mask=chm_mask)
In [109]:
#Get the properties of each segment
tree_properties = regionprops(labels, chm_array, ['Area','BoundingBox','Centroid','Orientation','MajorAxisLength','MinorAxisLength','MaxIntensity','MinIntensity'])
In [110]:
def get_predictors(tree, chm_array, labels):
indexes_of_tree = np.asarray(np.where(labels==tree.label)).T
tree_data = chm_array[indexes_of_tree[:,0],indexes_of_tree[:,1]]
full_crown = np.sum(tree_data - np.min(tree_data))
def crown_geometric_volume_pth(pth):
p = np.percentile(tree_data, pth)
tree_data_pth = [v if v < p else p for v in tree_data]
crown_geometric_volume_pth = np.sum(tree_data_pth - tree.min_intensity)
return crown_geometric_volume_pth, p
crown50, p50 = crown_geometric_volume_pth(50)
crown60, p60 = crown_geometric_volume_pth(60)
crown70, p70 = crown_geometric_volume_pth(70)
return [tree.label,
np.float(tree.area),
tree.major_axis_length,
tree.max_intensity,
tree.min_intensity,
p50, p60, p70,
full_crown, crown50, crown60, crown70]
In [111]:
predictors_mb = [get_predictors(tree, chm_array, labels) for tree in tree_properties]
all_training_data_mb = np.array([x[1:] for x in predictors_mb])
In [35]:
#Define the file of training data
training_data_file = 'SJER_Biomass_Training.csv'
#Read in the training data from a CSV file
training_data = np.genfromtxt(training_data_file,delimiter=',')
#Grab the biomass (Y) from the first line
biomass = training_data[:,0]
#Grab the biomass prdeictors from the remaining lines
biomass_predictors = training_data[:,1:12]
In [36]:
#Define paraemters for Random forest regressor
max_depth = 30
#Define regressor rules
regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
#Fit the biomass to regressor variables
regr_rf.fit(biomass_predictors,biomass)
Out[36]:
In [114]:
#Stack the predictor variables for all the individual trees
all_training_data = np.stack([area,
diameter,
max_tree_height,
min_tree_height,
percentile_50th,
percentile_60th,
percentile_70th,
crown_geometric_volume_full,
crown_geometric_volume_50th_percentile,
crown_geometric_volume_60th_percentile,
crown_geometric_volume_70th_percentile],axis=-1)
In [115]:
pred_biomass_mb = regr_rf.predict(all_training_data_mb)
In [123]:
biomass_out_mb = labels
for p, bm in zip(predictors_mb, pred_biomass_mb):
biomass_out_mb[biomass_out_mb==p[0]] = bm
In [136]:
for ii, (x, y) in enumerate(zip(biomass_out_mb, biomass_out)):
if np.sum(x==y)<len(x):
print(ii)
In [43]:
#Get biomass stats for plotting
mean_biomass = np.mean(pred_biomass)
std_biomass = np.std(pred_biomass)
min_biomass = np.min(pred_biomass)
sum_biomass = np.sum(pred_biomass)
print('Sum of biomass is ',sum_biomass,' kg')
In [ ]:
Sum of biomass is 6978251.34548 kg
In [ ]: