This IPython Notebook creates a probabilistic sea floor lithology map by using Stochastic gradient descent (SGD) method.

  • trainAndSaveModel()

    This function trains the machine learning model and save it to disk. The training takes some time.

  • loadModelAndPredict(lons, lats)

    This function predicts the sea floor sediment at given locations. Input:

      lons - a numpy array of longitudes.
      lats - a numpy array of latitudes.
    

    Output:

      The probability of each sediment type
  Sediment Type                      ID
  gravel                             1 
  sand                               2
  silt                               3
  clay                               4
  calcareous ooze                    5
  radiolarian ooze                   6
  diatom ooze                        7
  sponge spicules                    8
  mixed ooze                         9
  shell and coral fragments          10
  ash and volcanic sand/gravel       11
  siliceous mud                      12
  fine-grained calcareous sediment   13

For more information and the paper http://portal.gplates.org/static/html/seafloor.html


In [ ]:
import sea_floor_map as sfm

#WARNING: THIS CELL MAY TAKE A LONG TIME. YOU MAY SKIP THIS CELL IF YOU LIKE.

#train the SGD model and save it
sfm.trainAndSaveModel()

In [ ]:
import numpy as np
import sea_floor_map as sfm

sedimentTypes=[
        'gravel',
        'sand',
        'silt',
        'clay',
        'calcareous ooze',
        'radiolarian ooze',
        'diatom ooze',
        'sponge spicules',
        'mixed ooze',
        'shell and coral fragments',
        'ash and volcanic sand/gravel',
        'siliceous mud',
        'fine-grained calcareous sediment'
    ]

# some random locations
lons = np.random.random_sample((5,)) * 360 - 180 #np.array([11,22])
lats = np.random.random_sample((5,)) * 180 - 90 #np.array([33,44])

#load model and predict
Ey = sfm.loadModelAndPredict(lons, lats)

#print the predict results
for p in zip(lons, lats, Ey):
    bestIndex = np.argmax(p[2])
    print('The most likely sediment type for location ({0:.2f}, {1:.2f}) is \"{2}\" with probability of {3:.4f}.'.
          format(p[0],p[1],sedimentTypes[bestIndex], p[2][bestIndex]))
    #for s in zip(sedimentTypes,p[2]): #print out all the probabilities if you like
        #print('{0} : {1:.4f}'.format(s[0], s[1]))

In [ ]:
import matplotlib.pyplot as pl
import scipy.stats as stats
%matplotlib inline  

#Define the region of interest. Change the coordinates here to adjust map size.
leftBottom=(-41,-21)
rightTop=(41,21)

width = rightTop[0]-leftBottom[0]
height = rightTop[1]-leftBottom[1]
lons, lats = np.meshgrid(
    np.linspace(leftBottom[0],rightTop[0],width),
    np.linspace(leftBottom[1],rightTop[1],height))

#predict
Ey = sfm.loadModelAndPredict(lons, lats)

#draw maps
fig = pl.figure(figsize=(8, 4),dpi=240,frameon=False)
ax = pl.Axes(fig, [0., 0., 1., 1.])
#ax.set_axis_off()
fig.add_axes(ax)
pl.ioff()
pl.imshow(np.rot90(np.argmax(Ey,axis=1).reshape((width,height))))
pl.title('Most Probable Class Label')
pl.xlabel('Longitude'); pl.ylabel('Latitude')
pl.show()

fig = pl.figure(figsize=(8, 4),dpi=240,frameon=False)
ax = pl.Axes(fig, [0., 0., 1., 1.])
#ax.set_axis_off()
fig.add_axes(ax)
pl.ioff()
pl.imshow(np.rot90(stats.entropy(Ey.T).reshape((width,height))))
pl.title('Entropy')
pl.xlabel('Longitude'); pl.ylabel('Latitude')
pl.show()

In [ ]: