Imagery displaying observed light at night over the country of Burundi was downloaded from the NOAA VIIRS website: https://ngdc.noaa.gov/eog/viirs/download_ut_mos.html Burundi is contained within Tile 5, just slightly below the equator.
We will use satellite day and night time from 2017. Burundi's dry season spans June to late September. Imagery will be collected as close to August 15th, 2017 to minimize noise created from seasonality between the datasets and increase the likelhood the images will be cloud free.
Images downloaded from the above website are free of inherent geographic information. To convert the image from png to tiff while applying the coordinate system, I the following gdal command via the cli:
gdal_translate -a_srs EPSG:4326 -co compress=LZW SVDNB_npp_d20170815.d.00N060W.png Burundi_20170815.tiff
In [2]:
import os
import warnings
from pprint import pprint
import descarteslabs as dl
In [3]:
# *************FIND COUNTRY BOUNDARY ***************
matches = dl.places.find('burundi')
aoi = matches[0]
pprint(aoi)
shape = dl.places.shape(aoi['slug'], geom='low')
In [32]:
# *************OPEN AN EXISTING IMAGE FROM DISK ***************
from osgeo import gdal
import numpy as np
raster_data_path = "data/black-marble/night_lights_clipped.tif"
raster_dataset = gdal.Open(raster_data_path, gdal.GA_ReadOnly)
In [16]:
geo_transform = raster_dataset.GetGeoTransform()
# (416415.0, 30.0, 0.0, 6663145.0, 0.0, -30.0)
proj = raster_dataset.GetProjectionRef()
# PROJCS["WGS 84 / UTM zone 20S",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-63],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AUTHORITY["EPSG","32720"]]
bands_data = []
for b in range(1, raster_dataset.RasterCount+1):
band = raster_dataset.GetRasterBand(b)
bands_data.append(band.ReadAsArray())
# [array dtype=float32],[...], ...
bands_data = np.dstack(bands_data)
# Validate data
rows, cols, n_bands = bands_data.shape
# print (rows, cols, n_bands)
In [12]:
# *************PLOT SINGLE IMAGE ***************
import matplotlib.pyplot as plt
%matplotlib inline
plot_data = bands_data.squeeze()
plot_data.shape
plt.figure(figsize=[16,16])
plt.imshow(plot_data)
Out[12]:
In [13]:
def create_mask_from_vector(vector_data_path, cols, rows, geo_transform,
projection, target_value=1):
"""Rasterize the given vector (wrapper for gdal.RasterizeLayer)."""
print(vector_data_path)
data_source = gdal.OpenEx(vector_data_path, gdal.OF_VECTOR)
print(data_source)
layer = data_source.GetLayer(0)
driver = gdal.GetDriverByName('MEM') # In memory dataset
target_ds = driver.Create('', cols, rows, 1, gdal.GDT_UInt16)
target_ds.SetGeoTransform(geo_transform)
target_ds.SetProjection(projection)
gdal.RasterizeLayer(target_ds, [1], layer, burn_values=[target_value])
return target_ds
def vectors_to_raster(file_paths, rows, cols, geo_transform, projection):
"""Rasterize the vectors in the given directory in a single image."""
labeled_pixels = np.zeros((rows, cols))
print
for i, path in enumerate(file_paths):
label = i+1
ds = create_mask_from_vector(path, cols, rows, geo_transform,
projection, target_value=label)
band = ds.GetRasterBand(1)
labeled_pixels += band.ReadAsArray()
ds = None
return labeled_pixels
def write_geotiff(fname, data, geo_transform, projection):
"""Create a GeoTIFF file with the given data."""
driver = gdal.GetDriverByName('GTiff')
rows, cols = data.shape
dataset = driver.Create(fname, cols, rows, 1, gdal.GDT_Byte)
dataset.SetGeoTransform(geo_transform)
dataset.SetProjection(projection)
band = dataset.GetRasterBand(1)
band.WriteArray(data)
dataset = None # Close the file
Define training data:
In [14]:
output_fname = "data/black-marble/classification.tiff"
train_data_path = "data/black-marble/train/"
validation_data_path = "data/black-marble/test/"
Let's process the input training dataset. Project all the vector data, in the training dataset, into a numpy array. Each class is assigned a label (a number between 1 and the total number of classes). If the value v in the position (i, j) of this new array is not zero, that means that the pixel (i, j) must be used as a training sample of class v.
training_samples
is the list of pixels to be used for training. In our case, a pixel is a point in the nth-dimensional space of the bands.
training_labels
is a list of class labels such that the i-th position indicates the class for i-th pixel in training_samples
In [17]:
files = [f for f in os.listdir(train_data_path) if f.endswith('.shp')]
classes = [f.split('.')[0] for f in files]
print(classes)
shapefiles = [os.path.join(train_data_path, f)
for f in files if f.endswith('.shp')]
labeled_pixels = vectors_to_raster(shapefiles, rows, cols, geo_transform,
proj)
print(shapefiles, rows, cols, geo_transform, proj)
is_train = np.nonzero(labeled_pixels)
training_labels = labeled_pixels[is_train]
training_samples = bands_data[is_train]
Now that we have all our parameters, we can instantiate a RandomForestClassifier from Scikit-learn to train a model for the classification.
I have used the bare minimum to get the classification going, but will fine tune the parameters.
In [20]:
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_jobs=-1)
classifier.fit(training_samples, training_labels)
Out[20]:
To see the training data plotted:
In [ ]:
print(training_labels.shape)
print(training_samples.shape)
# Make the plot
plt.scatter(training_labels, training_samples, color='r', marker='o')
# Add some axis labels
plt.xlabel('Training Lables')
plt.ylabel('Training Samples')
# Add a title
plt.title('Training Data')
Let's apply the model to the night lights image for classification.
In [22]:
n_samples = rows*cols
flat_pixels = bands_data.reshape((n_samples, n_bands))
result = classifier.predict(flat_pixels)
classification = result.reshape((rows, cols))
In [23]:
from matplotlib import pyplot as plt
plt.figure(figsize=[16,16])
plt.imshow(classification)
Out[23]:
In [24]:
write_geotiff(output_fname, classification, geo_transform, proj)
Preprocess the test vector datasets:
In [26]:
# shapefiles = [os.path.join(validation_data_path, "%s.shp" % c)
# for c in classes]
shapefiles = ['data/black-marble/test/dark_test.shp', 'data/black-marble/test/light_test.shp']
print(['data/black-marble/test/dark_test.shp', 'data/black-marble/test/light_test.shp'])
print(rows)
print(cols)
print(geo_transform)
verification_pixels = vectors_to_raster(shapefiles, rows, cols, geo_transform,
proj)
for_verification = np.nonzero(verification_pixels)
verification_labels = verification_pixels[for_verification]
predicted_labels = classification[for_verification]
In [27]:
print("Confussion matrix:\n%s" %
metrics.confusion_matrix(verification_labels, predicted_labels))
In [28]:
target_names = ['Class %s' % s for s in classes]
print("Classification report:\n%s" %
metrics.classification_report(verification_labels, predicted_labels,
target_names=target_names))
print("Classification accuracy: %f" %
metrics.accuracy_score(verification_labels, predicted_labels))