In [1]:
# Imports
import math as m
import pandas as pd
import numpy as np
import scipy as sp
import os
my_plotly_api_key = os.environ.get('MY_PLOTLY_API_KEY') # retrive api_key from operating system
import plotly
plotly.tools.set_credentials_file(username='agu3rra', api_key=my_plotly_api_key) # setting up credentials; Plotly is an online service.
import plotly.plotly as py # import graphics library
import plotly.graph_objs as go
from scipy import integrate
import random
# Data loading
dfCoordinates = pd.read_csv('gpsDataUTM.csv') # reads data into dataframe
#---a--#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#
# Calculate volume of earth from the lowest measured point
# Idea: generate training set to a Neural Network and treat it as a regression problem
# Part 1: Generate training set
# Idea: generate a set of mathematical surface functions z=f(x,y) that go by
# close to the points we're getting from GPS. Use these funcitons to generate a
# set of (x,y,z) points and a double integral to calculate the exact valume of
# the solid delimited by it.
# Better idea: maybe the model can generelize from general space, no need to actually find functions that have values close to our sample set.
# The only remaining challenge is to select only a subset of points that match the input data size (random selection)
In [2]:
# Determine bounds of interest
x_max = dfCoordinates['x_rel'].max()
y_max = dfCoordinates['y_rel'].max()
z_max = dfCoordinates['z_rel'].max()
In [3]:
# Remember: in terrain measurements, all x, y and z values will be positive
# Dev note: increasing number of samples may be required
x = np.linspace(0.0,x_max,num=500) # generate linear space for x values
y = np.linspace(0.0,y_max,num=500)
xGrid, yGrid = np.meshgrid(x,y) # generate mesh grid for plotting sample data
In [4]:
# Generate a equations and corresponding double integrals
# Data of interest: functions with variations bounded by 0:*_max values
# Dev note: How many double integral evaluations are needed for the model to be able to generalize well? Starting at 1000
integrate.dblquad(lambda y, x: 3 + x**2 - 2*y, 0.0, x_max, lambda x: -x, lambda x: x)
Out[4]:
In [5]:
def combination(n,k): # Combination of n samples taken k at a time.
return m.factorial(n)/(m.factorial(n-k)*m.factorial(k))
In [6]:
combination(156,3) # there are 620620 planes defined by all 156 distinct points in my sample data
Out[6]:
In [7]:
def combinations(iterable, r):
# combinations('ABCD', 2) --> AB AC AD BC BD CD
# combinations(range(4), 3) --> 012 013 023 123
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = list(range(r))
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i+1, r):
indices[j] = indices[j-1] + 1
yield tuple(pool[i] for i in indices)
In [8]:
c = combinations('ABCD',2)
In [9]:
for i in c:
print(i)
In [10]:
# return a list with each of the 156 points from pandas dataframe and apply combination
# define all plane equations that can be obtained from combination of each point 3 at a time.
# evaluate bounded numerical integral of each plane
# select a subset of points that match that bounded space on the evaluated integral and match with corresponding volume calculation (double integral result)
# Your training set will be comprised of 620.620 sets of 156 points (156x3 = 468 inputs) and corresponding volume.
# Train your model on a neural network
In [11]:
# Sample code: generate the plane equation of any 3 points available at the GPS dataframe
In [12]:
A = np.array([dfCoordinates.iloc[2].values[5],
dfCoordinates.iloc[2].values[6],
dfCoordinates.iloc[2].values[7]])
In [13]:
B = np.array([dfCoordinates.iloc[20].values[5],
dfCoordinates.iloc[20].values[6],
dfCoordinates.iloc[20].values[7]])
In [14]:
C = np.array([dfCoordinates.iloc[60].values[5],
dfCoordinates.iloc[60].values[6],
dfCoordinates.iloc[60].values[7]])
In [15]:
# A, B and C are relative coordinates collected from the GPS survey
In [16]:
# Plane equation: a(x-xo) + b(y-yo) + c(z-zo) = 0
# , where [xo,yo,zo] is a point in the plane (so A, B or C can be used)
# [a,b,c] is a vector perpendicular to the plane (it can be obtained from the cross product of any 2 vectors within the plane)
In [17]:
AB = B-A # Vector AB is within the desired plane
In [18]:
print(A)
print(B)
print(AB)
print(type(AB))
In [19]:
BC = C-B # Vector BC is also within the plane
In [20]:
VP = np.cross(AB,BC) # VP is a vector perpendicular to the desired plane
In [21]:
VP # VP contains a,b,c values of the plane equation
Out[21]:
In [22]:
# Plotting plane:
# Equation coeficients and point within plane
a = VP[0]
b = VP[1]
c = VP[2]
xo = A[0]
yo = A[1]
zo = A[2]
In [23]:
z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo # plane equation
In [24]:
z.shape
Out[24]:
In [25]:
surface = go.Surface(x=x, y=y, z=z)
xPoints = [A[0], B[0], C[0]]
yPoints = [A[1], B[1], C[1]]
zPoints = [A[2], B[2], C[2]]
points = go.Scatter3d(x=xPoints,
y=yPoints,
z=zPoints,
mode='markers',
marker=dict(size=5,opacity=0.8))
data = [surface, points]
layoutDictionary = dict(gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)')
layout = go.Layout(
title='Plane Plot',
scene=dict(
xaxis=layoutDictionary,
yaxis=layoutDictionary,
zaxis=layoutDictionary
)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Plane plot')
Out[25]:
In [26]:
# Defining a cuboid of 5x4x10 (Volume = 200)
A = np.array([5,0,10])
B = np.array([0,4,10])
C = np.array([5,4,10])
AB = B-A # Vector AB is within the desired plane
BC = C-B # Vector AB is within the desired plane
VP = np.cross(AB,BC) # perpendicular vector
# Equation coeficients and point within plane
a = VP[0]
b = VP[1]
c = VP[2]
xo = A[0]
yo = A[1]
zo = A[2]
z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo # plane equation
surface = go.Surface(x=x, y=y, z=z)
xPoints = [A[0], B[0], C[0]]
yPoints = [A[1], B[1], C[1]]
zPoints = [A[2], B[2], C[2]]
points = go.Scatter3d(x=xPoints,
y=yPoints,
z=zPoints,
mode='markers',
marker=dict(size=5,opacity=0.8))
data = [surface, points]
layoutDictionary = dict(gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)')
layout = go.Layout(
title='Cubic Plot',
scene=dict(
xaxis=layoutDictionary,
yaxis=layoutDictionary,
zaxis=layoutDictionary
)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Cubic plot')
Out[26]:
In [27]:
# Calculating corresponding volume with double integral.
# Expected value: 200
integrate.dblquad(lambda y, x: ((-a*(x-xo)-b*(y-yo))/c)+zo, 0.0, 5.0, lambda x: 0.0, lambda x: 4.0)
Out[27]:
In [28]:
# Success! The above code shows us how to define a plane and calculate the corresponding volume of a bounded shape.
In [29]:
# return a list with each of the 155 points from pandas dataframe and apply combination
# define all plane equations that can be obtained from combination of each point 3 at a time.
dfSurvey = dfCoordinates[['x_rel',
'y_rel',
'z_rel']]
In [30]:
dfSurvey.head()
Out[30]:
In [31]:
pointsCount = int(dfSurvey.count()[0]) # number of points available
In [32]:
combination(155,3) # number of possible combinations
Out[32]:
In [60]:
planeCombinations = combinations(range(pointsCount),3) # indexes all possible combinations of available points
# DEBUG planeCombinations = combinations(range(4),3) # reducing set for debugging
mlDataset = pd.DataFrame([]) # initialize new dataframe for the machine learning dataset
# Determine plane equation for each set of 3 points and calculate corresponding volume
for pointSet in planeCombinations:
point1index = pointSet[0] # index of the first point
point2index = pointSet[1]
point3index = pointSet[2]
# fetching actual points according to index
point1 = np.array([dfSurvey.iloc[point1index].values[0],
dfSurvey.iloc[point1index].values[1],
dfSurvey.iloc[point1index].values[2]])
point2 = np.array([dfSurvey.iloc[point2index].values[0],
dfSurvey.iloc[point2index].values[1],
dfSurvey.iloc[point2index].values[2]])
point3 = np.array([dfSurvey.iloc[point3index].values[0],
dfSurvey.iloc[point3index].values[1],
dfSurvey.iloc[point3index].values[2]])
planeVectorOne = point2 - point1 # a vector within the plane
planeVectorTwo = point3 - point2 # a vector within the plane
perpendicularVector = np.cross(planeVectorOne, planeVectorTwo) # plane equation coeficients obtained <a,b,c>
# saving constants to literals
a = perpendicularVector[0]
b = perpendicularVector[1]
c = perpendicularVector[2]
xo = point1[0]
yo = point1[1]
zo = point1[2]
# Plane equation a(x-xo)+b(y-yo)+c(z-zo)=0 with isolated z:
z = ((-a*(xGrid-xo)-b*(yGrid-yo))/c)+zo
pointsInGrid = z.shape[0]
evalutePoints = pointsInGrid - pointsCount # evaluate number of available points within your grid - samples from GPS
if evalutePoints>0: # normal scenario
randomIndexesX = random.sample(range(pointsInGrid), pointsCount) # generate random list of x points, where x is the number of points collected by the GPS
randomIndexesY = random.sample(range(pointsInGrid), pointsCount)
else: # your grid has less points than colleted from the GPS survey. Increase grid size
print("Your grid has %s points less than required. Increase grid size." % evaluatePoints)
mlSample = pd.DataFrame([]) # initialize new dataframe for sample storage
for i in range(pointsCount):
x_value = xGrid[randomIndexesX[i]][randomIndexesX[i]] # obtain value of corresponding random value from the xGrid
y_value = yGrid[randomIndexesY[i]][randomIndexesY[i]]
z_value = z[randomIndexesX[i]][randomIndexesY[i]] # Corresponding z value evaluated for x,y
# append data to dataframe
samplePoint = pd.Series(dict(x=x_value,
y=y_value,
z=z_value))
mlSample = mlSample.append(samplePoint,
ignore_index=True) # append newly converted data to dataframe
# the dfTrainingSample contains a set of points for which we need to evaluate the corresponding bounds and volume
# evaluate max and min bounds
x_min = mlSample['x'].min()
x_max = mlSample['x'].max()
y_min = mlSample['y'].min()
y_max = mlSample['y'].max()
# evaluating bounds for z is not required for evaluating the double integral (plane equation is used for z)
# Evaluating corresponding volume for this point set
volume = integrate.dblquad(lambda y, x: ((-a*(x-xo)-b*(y-yo))/c)+zo,
x_min, x_max,
lambda x: y_min, lambda x: y_max)
volume = volume[0] # saving the computed volume. The second element in the list holds the error.
# print(dfTrainingSample.head())
# print(volume)
# at this point of the code we have a dataframe with corresponding points in a plane and a corresponding volume.
# This is a sample! We made one! Now on to saving it in a format compatible with the neural network
# Saving training sample: for 155 data points >> row = 466 elements all x's, all y's, all z's + volume
# append data to mlDataset
#create numpy array to store in mlDataframe (single row)
sampleData = np.concatenate([mlSample['x'].values,
mlSample['y'].values,
mlSample['z'].values])
datasetSample = pd.Series(dict(inputData = sampleData,
inputLabel = volume))
mlDataset = mlDataset.append(datasetSample,
ignore_index=True)
# Debugging
# print("Here's a point in the <x,y,z> grid:")
# print(xGrid[randomIndex][randomIndex])
# print(yGrid[randomIndex][randomIndex])
# print(z[randomIndex][randomIndex])
# print("%s %s %s" % (a, b, c))
# print(point1)
# Create dataframe from list of indexed random samples
In [62]:
mlDataset.to_csv('mlDataset.csv',index=False) # save to file
In [63]:
dfNew = pd.read_csv('mlDataset.csv')
In [64]:
dfNew
Out[64]:
In [65]:
mlDataset
Out[65]:
In [66]:
# Saving to CSV doesn't exactly read right when trying to import. I am going to pickle it.
import pickle
file = open('mlDataset.pickle', 'wb')
pickle.dump(mlDataset, file)
file.close()
In [67]:
# recovering data
file = open('mlDataset.pickle', 'rb')
pickledDataset = pickle.load(file)
file.close()
In [68]:
pickledDataset
Out[68]:
In [37]:
trace1 = go.Scatter3d(
x=dfTrainingSample['x'].values,
y=dfTrainingSample['y'].values,
z=dfTrainingSample['z'].values,
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(217, 217, 217, 0.14)',
width=0.5
),
opacity=0.8
)
)
data = [trace1]
layout = go.Layout(
margin=dict(
l=0,
r=0,
b=0,
t=0
)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='simple-3d-scatter')
Out[37]:
In [110]:
xGrid.shape
Out[110]: