Imports



In [1]:

    
# Imports
import math as m
import pandas as pd
import numpy as np
import scipy as sp
import os
my_plotly_api_key = os.environ.get('MY_PLOTLY_API_KEY') # retrive api_key from operating system
import plotly
plotly.tools.set_credentials_file(username='agu3rra', api_key=my_plotly_api_key) # setting up credentials; Plotly is an online service.
import plotly.plotly as py # import graphics library
import plotly.graph_objs as go
from scipy import integrate
import random

# Data loading
dfCoordinates = pd.read_csv('gpsDataUTM.csv') # reads data into dataframe

#---a--#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#
# Calculate volume of earth from the lowest measured point
# Idea: generate training set to a Neural Network and treat it as a regression problem

# Part 1: Generate training set
# Idea: generate a set of mathematical surface functions z=f(x,y) that go by
# close to the points we're getting from GPS. Use these funcitons to generate a
# set of (x,y,z) points and a double integral to calculate the exact valume of
# the solid delimited by it.

# Better idea: maybe the model can generelize from general space, no need to actually find functions that have values close to our sample set.
# The only remaining challenge is to select only a subset of points that match the input data size (random selection)

Main



In [2]:

    
# Determine bounds of interest
x_max = dfCoordinates['x_rel'].max()
y_max = dfCoordinates['y_rel'].max()
z_max = dfCoordinates['z_rel'].max()



In [3]:

    
# Remember: in terrain measurements, all x, y and z values will be positive
# Dev note: increasing number of samples may be required
x = np.linspace(0.0,x_max,num=500) # generate linear space for x values
y = np.linspace(0.0,y_max,num=500)
xGrid, yGrid = np.meshgrid(x,y) # generate mesh grid for plotting sample data



In [4]:

    
# Generate a equations and corresponding double integrals
# Data of interest: functions with variations bounded by 0:*_max values

# Dev note: How many double integral evaluations are needed for the model to be able to generalize well? Starting at 1000
integrate.dblquad(lambda y, x: 3 + x**2 - 2*y, 0.0, x_max, lambda x: -x, lambda x: x)









    Out[4]:





(1175537925.0989282, 1.3051092707649128e-05)



In [5]:

    
def combination(n,k): # Combination of n samples taken k at a time.
    return m.factorial(n)/(m.factorial(n-k)*m.factorial(k))



In [6]:

    
combination(156,3) # there are 620620 planes defined by all 156 distinct points in my sample data









    Out[6]:





620620.0



In [7]:

    
def combinations(iterable, r):
    # combinations('ABCD', 2) --> AB AC AD BC BD CD
    # combinations(range(4), 3) --> 012 013 023 123
    pool = tuple(iterable)
    n = len(pool)
    if r > n:
        return
    indices = list(range(r))
    yield tuple(pool[i] for i in indices)
    while True:
        for i in reversed(range(r)):
            if indices[i] != i + n - r:
                break
        else:
            return
        indices[i] += 1
        for j in range(i+1, r):
            indices[j] = indices[j-1] + 1
        yield tuple(pool[i] for i in indices)



In [8]:

    
c = combinations('ABCD',2)



In [9]:

    
for i in c:
    print(i)









    



('A', 'B')
('A', 'C')
('A', 'D')
('B', 'C')
('B', 'D')
('C', 'D')



In [10]:

    
# return a list with each of the 156 points from pandas dataframe and apply combination
# define all plane equations that can be obtained from combination of each point 3 at a time.
# evaluate bounded numerical integral of each plane
# select a subset of points that match that bounded space on the evaluated integral and match with corresponding volume calculation (double integral result)
# Your training set will be comprised of 620.620 sets of 156 points (156x3 = 468 inputs) and corresponding volume.
# Train your model on a neural network

Sample: Generating plane equation from 3 points



In [11]:

    
# Sample code: generate the plane equation of any 3 points available at the GPS dataframe



In [12]:

    
A = np.array([dfCoordinates.iloc[2].values[5],
              dfCoordinates.iloc[2].values[6],
              dfCoordinates.iloc[2].values[7]])



In [13]:

    
B = np.array([dfCoordinates.iloc[20].values[5],
              dfCoordinates.iloc[20].values[6],
              dfCoordinates.iloc[20].values[7]])



In [14]:

    
C = np.array([dfCoordinates.iloc[60].values[5],
              dfCoordinates.iloc[60].values[6],
              dfCoordinates.iloc[60].values[7]])



In [15]:

    
# A, B and C are relative coordinates collected from the GPS survey



In [16]:

    
# Plane equation: a(x-xo) + b(y-yo) + c(z-zo) = 0
# , where [xo,yo,zo] is a point in the plane (so A, B or C can be used)
# [a,b,c] is a vector perpendicular to the plane (it can be obtained from the cross product of any 2 vectors within the plane)



In [17]:

    
AB = B-A # Vector AB is within the desired plane



In [18]:

    
print(A)
print(B)
print(AB)
print(type(AB))









    



[ 214.019258     98.79277407   14.14      ]
[  0.0209151   14.59788367   9.47      ]
[-213.9983429  -84.1948904   -4.67     ]
<class 'numpy.ndarray'>



In [19]:

    
BC = C-B # Vector BC is also within the plane



In [20]:

    
VP = np.cross(AB,BC) # VP is a vector perpendicular to the desired plane



In [21]:

    
VP # VP contains a,b,c values of the plane equation









    Out[21]:





array([  164.82014359,  -494.98349017,  1371.27261237])



In [22]:

    
# Plotting plane:

# Equation coeficients and point within plane
a = VP[0]
b = VP[1]
c = VP[2]
xo = A[0]
yo = A[1]
zo = A[2]



In [23]:

    
z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo # plane equation



In [24]:

    
z.shape









    Out[24]:





(500, 500)



In [25]:

    
surface = go.Surface(x=x, y=y, z=z)

xPoints = [A[0], B[0], C[0]]
yPoints = [A[1], B[1], C[1]]
zPoints = [A[2], B[2], C[2]]

points = go.Scatter3d(x=xPoints,
                    y=yPoints,
                    z=zPoints,
                    mode='markers',
                    marker=dict(size=5,opacity=0.8))
data = [surface, points]

layoutDictionary = dict(gridcolor='rgb(255, 255, 255)',
                        zerolinecolor='rgb(255, 255, 255)',
                        showbackground=True,
                        backgroundcolor='rgb(230, 230,230)')

layout = go.Layout(
    title='Plane Plot',
    scene=dict(
        xaxis=layoutDictionary,
        yaxis=layoutDictionary,
        zaxis=layoutDictionary
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Plane plot')









    Out[25]:

Plotting a cube and testing double integral volume calculation



In [26]:

    
# Defining a cuboid of 5x4x10 (Volume = 200)
A = np.array([5,0,10])
B = np.array([0,4,10])
C = np.array([5,4,10])
AB = B-A # Vector AB is within the desired plane
BC = C-B # Vector AB is within the desired plane
VP = np.cross(AB,BC) # perpendicular vector
# Equation coeficients and point within plane
a = VP[0]
b = VP[1]
c = VP[2]
xo = A[0]
yo = A[1]
zo = A[2]
z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo # plane equation
surface = go.Surface(x=x, y=y, z=z)

xPoints = [A[0], B[0], C[0]]
yPoints = [A[1], B[1], C[1]]
zPoints = [A[2], B[2], C[2]]

points = go.Scatter3d(x=xPoints,
                    y=yPoints,
                    z=zPoints,
                    mode='markers',
                    marker=dict(size=5,opacity=0.8))
data = [surface, points]

layoutDictionary = dict(gridcolor='rgb(255, 255, 255)',
                        zerolinecolor='rgb(255, 255, 255)',
                        showbackground=True,
                        backgroundcolor='rgb(230, 230,230)')

layout = go.Layout(
    title='Cubic Plot',
    scene=dict(
        xaxis=layoutDictionary,
        yaxis=layoutDictionary,
        zaxis=layoutDictionary
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Cubic plot')









    Out[26]:



In [27]:

    
# Calculating corresponding volume with double integral.
# Expected value: 200
integrate.dblquad(lambda y, x: ((-a*(x-xo)-b*(y-yo))/c)+zo, 0.0, 5.0, lambda x: 0.0, lambda x: 4.0)









    Out[27]:





(200.0, 2.220446049250313e-12)



In [28]:

    
# Success! The above code shows us how to define a plane and calculate the corresponding volume of a bounded shape.

Main: resuming code



In [29]:

    
# return a list with each of the 155 points from pandas dataframe and apply combination
# define all plane equations that can be obtained from combination of each point 3 at a time.
dfSurvey = dfCoordinates[['x_rel',
                          'y_rel',
                          'z_rel']]



In [30]:

    
dfSurvey.head()



In [31]:

    
pointsCount = int(dfSurvey.count()[0]) # number of points available



In [32]:

    
combination(155,3) # number of possible combinations









    Out[32]:





608685.0



In [60]:

    
planeCombinations = combinations(range(pointsCount),3) # indexes all possible combinations of available points
# DEBUG planeCombinations = combinations(range(4),3) # reducing set for debugging

mlDataset = pd.DataFrame([]) # initialize new dataframe for the machine learning dataset
# Determine plane equation for each set of 3 points and calculate corresponding volume
for pointSet in planeCombinations:
    point1index = pointSet[0] # index of the first point
    point2index = pointSet[1]
    point3index = pointSet[2]
    
    # fetching actual points according to index
    point1 = np.array([dfSurvey.iloc[point1index].values[0],
                       dfSurvey.iloc[point1index].values[1],
                       dfSurvey.iloc[point1index].values[2]])
    point2 = np.array([dfSurvey.iloc[point2index].values[0],
                       dfSurvey.iloc[point2index].values[1],
                       dfSurvey.iloc[point2index].values[2]])    
    point3 = np.array([dfSurvey.iloc[point3index].values[0],
                       dfSurvey.iloc[point3index].values[1],
                       dfSurvey.iloc[point3index].values[2]])
    
    planeVectorOne = point2 - point1 # a vector within the plane
    planeVectorTwo = point3 - point2 # a vector within the plane
    perpendicularVector = np.cross(planeVectorOne, planeVectorTwo) # plane equation coeficients obtained <a,b,c>

    # saving constants to literals
    a = perpendicularVector[0]
    b = perpendicularVector[1]
    c = perpendicularVector[2]
    xo = point1[0]
    yo = point1[1]
    zo = point1[2]
    
    # Plane equation a(x-xo)+b(y-yo)+c(z-zo)=0 with isolated z:
    z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo
    
    pointsInGrid = z.shape[0]
    evalutePoints = pointsInGrid - pointsCount # evaluate number of available points within your grid - samples from GPS
    if evalutePoints>0: # normal scenario
        randomIndexesX = random.sample(range(pointsInGrid), pointsCount) # generate random list of x points, where x is the number of points collected by the GPS
        randomIndexesY = random.sample(range(pointsInGrid), pointsCount)
    else: # your grid has less points than colleted from the GPS survey. Increase grid size
        print("Your grid has %s points less than required. Increase grid size." % evaluatePoints)

    mlSample = pd.DataFrame([]) # initialize new dataframe for sample storage
    
    for i in range(pointsCount):
        x_value = xGrid[randomIndexesX[i]][randomIndexesX[i]] # obtain value of corresponding random value from the xGrid
        y_value = yGrid[randomIndexesY[i]][randomIndexesY[i]]
        z_value = z[randomIndexesX[i]][randomIndexesY[i]] # Corresponding z value evaluated for x,y

        # append data to dataframe
        samplePoint = pd.Series(dict(x=x_value,
                                     y=y_value,
                                     z=z_value))
        mlSample = mlSample.append(samplePoint,
                                   ignore_index=True) # append newly converted data to dataframe

    # the dfTrainingSample contains a set of points for which we need to evaluate the corresponding bounds and volume    
    # evaluate max and min bounds
    x_min = mlSample['x'].min()
    x_max = mlSample['x'].max()
    y_min = mlSample['y'].min()
    y_max = mlSample['y'].max()
    # evaluating bounds for z is not required for evaluating the double integral (plane equation is used for z)
    
    # Evaluating corresponding volume for this point set
    volume = integrate.dblquad(lambda y, x: ((-a*(x-xo)-b*(y-yo))/c)+zo, 
                               x_min, x_max, 
                               lambda x: y_min, lambda x: y_max)
    volume = volume[0] # saving the computed volume. The second element in the list holds the error.
#     print(dfTrainingSample.head())
#     print(volume)
    # at this point of the code we have a dataframe with corresponding points in a plane and a corresponding volume.
    # This is a sample! We made one! Now on to saving it in a format compatible with the neural network
    # Saving training sample: for 155 data points >> row = 466 elements all x's, all y's, all z's + volume
    # append data to mlDataset
    
    #create numpy array to store in mlDataframe (single row)
    sampleData = np.concatenate([mlSample['x'].values,
                                 mlSample['y'].values,
                                 mlSample['z'].values])
    datasetSample = pd.Series(dict(inputData = sampleData,
                                   inputLabel = volume))
    mlDataset = mlDataset.append(datasetSample,
                                 ignore_index=True)


# Debugging        
#     print("Here's a point in the <x,y,z> grid:")
#     print(xGrid[randomIndex][randomIndex])
#     print(yGrid[randomIndex][randomIndex])
#     print(z[randomIndex][randomIndex])
#     print("%s %s %s" % (a, b, c))
#     print(point1)

# Create dataframe from list of indexed random samples



In [62]:

    
mlDataset.to_csv('mlDataset.csv',index=False) # save to file



In [63]:

    
dfNew = pd.read_csv('mlDataset.csv')



In [64]:

    
dfNew









    Out[64]:







  
    
      
      inputData
      inputLabel
    
  
  
    
      0
      [  77.22194358  202.54212629    0.8825365   13...
      5.884609e+05
    
    
      1
      [   6.17775549    0.8825365   135.91062069   2...
      8.986781e+05
    
    
      2
      [  1.95481834e+02   1.29291597e+02   1.6194544...
      1.308210e+08
    
    
      3
      [  1.79596177e+02   8.51647721e+01   2.6476094...
      -1.429454e+08



In [65]:

    
mlDataset









    Out[65]:







  
    
      
      inputData
      inputLabel
    
  
  
    
      0
      [77.2219435756, 202.542126293, 0.882536498007,...
      5.884609e+05
    
    
      1
      [6.17775548605, 0.882536498007, 135.910620693,...
      8.986781e+05
    
    
      2
      [195.481834309, 129.291596958, 161.945447384, ...
      1.308210e+08
    
    
      3
      [179.596177344, 85.1647720577, 2.64760949402, ...
      -1.429454e+08



In [66]:

    
# Saving to CSV doesn't exactly read right when trying to import. I am going to pickle it.
import pickle
file = open('mlDataset.pickle', 'wb')
pickle.dump(mlDataset, file)
file.close()



In [67]:

    
# recovering data
file = open('mlDataset.pickle', 'rb')
pickledDataset = pickle.load(file)
file.close()



In [68]:

    
pickledDataset









    Out[68]:







  
    
      
      inputData
      inputLabel
    
  
  
    
      0
      [77.2219435756, 202.542126293, 0.882536498007,...
      5.884609e+05
    
    
      1
      [6.17775548605, 0.882536498007, 135.910620693,...
      8.986781e+05
    
    
      2
      [195.481834309, 129.291596958, 161.945447384, ...
      1.308210e+08
    
    
      3
      [179.596177344, 85.1647720577, 2.64760949402, ...
      -1.429454e+08



In [37]:

    
trace1 = go.Scatter3d(
    x=dfTrainingSample['x'].values,
    y=dfTrainingSample['y'].values,
    z=dfTrainingSample['z'].values,
    mode='markers',
    marker=dict(
        size=5,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

data = [trace1]
layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='simple-3d-scatter')









    



High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~agu3rra/0 or inside your plot.ly account where it is named 'simple-3d-scatter'






    Out[37]:



In [110]:

    
xGrid.shape









    Out[110]:





(500, 500)

	x_rel	y_rel	z_rel
0	213.860510	127.527218	14.12
1	213.700623	127.211200	14.12
2	214.019258	98.792774	14.14
3	214.261816	70.346291	11.64
4	217.136937	44.608228	9.87

	inputData	inputLabel
0	[ 77.22194358 202.54212629 0.8825365 13...	5.884609e+05
1	[ 6.17775549 0.8825365 135.91062069 2...	8.986781e+05
2	[ 1.95481834e+02 1.29291597e+02 1.6194544...	1.308210e+08
3	[ 1.79596177e+02 8.51647721e+01 2.6476094...	-1.429454e+08

	inputData	inputLabel
0	[77.2219435756, 202.542126293, 0.882536498007,...	5.884609e+05
1	[6.17775548605, 0.882536498007, 135.910620693,...	8.986781e+05
2	[195.481834309, 129.291596958, 161.945447384, ...	1.308210e+08
3	[179.596177344, 85.1647720577, 2.64760949402, ...	-1.429454e+08