In [103]:

    
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
matplotlib.style.use('ggplot')
%matplotlib inline

After several tests we found in almost all cases that training a function on every possibility is much more effective than randomly generating data.



In [104]:

    
data_inorder = pd.read_csv('Data\\adder_inorder_data.csv')
data_inorder = data_inorder[['Steps', 'MSE']]
data_inorder = data_inorder.sort_values(['Steps'])
data_inorder.head(9)



In [105]:

    
data_rnd_0 = pd.read_csv('Data\\adder_random_0_data.csv')
data_rnd_0 = data_rnd_0[['Steps', 'MSE']]
data_rnd_0 = data_rnd_0.sort_values(['Steps'])

data_rnd_1 = pd.read_csv('Data\\adder_random_1_data.csv')
data_rnd_1 = data_rnd_1[['Steps', 'MSE']]
data_rnd_1 = data_rnd_1.sort_values(['Steps'])

data_rnd_2 = pd.read_csv('Data\\adder_random_2_data.csv')
data_rnd_2 = data_rnd_2[['Steps', 'MSE']]
data_rnd_2 = data_rnd_2.sort_values(['Steps'])

data_rnd_3 = pd.read_csv('Data\\adder_random_3_data.csv')
data_rnd_3 = data_rnd_3[['Steps', 'MSE']]
data_rnd_3 = data_rnd_3.sort_values(['Steps'])

data_rnd_4 = pd.read_csv('Data\\adder_random_4_data.csv')
data_rnd_4 = data_rnd_4[['Steps', 'MSE']]
data_rnd_4 = data_rnd_4.sort_values(['Steps'])



In [106]:

    
plt.plot(data_inorder['Steps'].ix[:20], data_inorder['MSE'].ix[:20], 'bo',
         data_rnd_0['Steps'].ix[:20], data_rnd_0['MSE'].ix[:20],
        data_rnd_1['Steps'].ix[:20], data_rnd_1['MSE'].ix[:20],
        data_rnd_2['Steps'].ix[:20], data_rnd_2['MSE'].ix[:20],
        data_rnd_3['Steps'].ix[:20], data_rnd_3['MSE'].ix[:20],
        data_rnd_4['Steps'].ix[:20], data_rnd_4['MSE'].ix[:20])
plt.show()



In [107]:

    
plt.plot(data_inorder['Steps'].ix[30:], data_inorder['MSE'].ix[30:], 'bo',
         data_rnd_0['Steps'].ix[30:], data_rnd_0['MSE'].ix[30:],
        data_rnd_1['Steps'].ix[30:], data_rnd_1['MSE'].ix[30:],
        data_rnd_2['Steps'].ix[30:], data_rnd_2['MSE'].ix[30:],
        data_rnd_3['Steps'].ix[30:], data_rnd_3['MSE'].ix[30:],
        data_rnd_4['Steps'].ix[30:], data_rnd_4['MSE'].ix[30:])
plt.show()



In [108]:

    
plt.plot(data_rnd_1['Steps'].ix[30:], data_rnd_1['MSE'].ix[30:],
        data_rnd_2['Steps'].ix[30:], data_rnd_2['MSE'].ix[30:],
        data_rnd_4['Steps'].ix[30:], data_rnd_4['MSE'].ix[30:])
plt.show()

adder(n): Adds 42 to n

Process:

Tried different methods of generating data to find which worked the best
- All possible values 0-100
- 1000 random values 0-100
- All possible values 0-100 10 times for a total of 1000 datum
- All possible values 0-100 100 times for a total of 10000 datum
Experimented with single layer hidden units 1-20
Experimented with two layer hidden units [1-20, 1-20]
Found how well each different neural net extrapolated to other values
Tried to scale data up to learn 1-1000

Results:

Data	MSE
100 data: 1-100	.065861
1000 datum: random(100)	.028475
1000 datum: 1-100 10 times	.007759
10000 datum: 1-100 100 times	409.116

The results seemed to show that iterating through every possibility multiple times and then training on that is the best method of data gathering. As with many things in this, you have to find the fine line of having the right amount of data without having too much.

I was interested in how far a neural net could extrapolate to numbers that it had never seen before. Although this should have been an easy problem because it is linear, it wasn't because SkFlow doesn't allow you to choose the activation function for your regressor. Most all of the single and double layer neural nets I trained failed around 200-300, however there was one that stood out and was able to correctly predict 1-1145 with just training on 1-100



In [109]:

    
data_inorder = pd.read_csv('Data\\adder_inorder_data.csv')
data_inorder = data_inorder[['Steps', 'MSE']]
data_inorder = data_inorder.sort_values(['Steps'])



In [110]:

    
arr = np.zeros(5)
arr[0] = 5



In [111]:

    
arr = ['100', '200', '300', '400', '500', '600', '700', 
       '1000','1100','1200','1300', '1400','1500', '1600','1700','1800', '1900',
      '2000', '2100', '2300', '2400', '2500']
df_arr = []
for i in range(len(arr)):
    temp = pd.read_csv('Data\\determinant_' + arr[i] +'_layer_by_100.csv', header=None)
    temp = temp.T
    temp.columns=['Second', 'MSE']
    temp['First'] = arr[i]
    temp = temp.sort_values(['First', 'Second'])
    df_arr.append(temp)
len(df_arr)









    Out[111]:





22



In [ ]:

    
temp = pd.read_csv('Data\\determinant_layer_by_100.csv', header=None)
    temp = temp.T
    temp.columns=['Second', 'MSE']



In [131]:

    
frames = [df_arr[0], df_arr[1], df_arr[2], df_arr[3], df_arr[4], df_arr[5],
          df_arr[6], df_arr[7], df_arr[8], df_arr[9], df_arr[10], df_arr[11],
          df_arr[12], df_arr[13], df_arr[14], df_arr[15], df_arr[16], df_arr[17],
          df_arr[18], df_arr[19], df_arr[20], df_arr[21]]
result = pd.concat(frames)
result = result.reset_index(drop=True)
result.sort_values(['MSE'])









    Out[131]:






  
    
      
      Second
      MSE
      First
    
  
  
    
      125
      600
      0.357921
      500
    
    
      170
      2100
      0.373738
      600
    
    
      86
      2700
      0.388915
      300
    
    
      51
      2200
      0.408906
      200
    
    
      12
      1300
      0.409569
      100
    
    
      64
      500
      0.418992
      300
    
    
      97
      800
      0.422525
      400
    
    
      173
      2400
      0.426857
      600
    
    
      122
      300
      0.431968
      500
    
    
      205
      2600
      0.438323
      700
    
    
      106
      1700
      0.438949
      400
    
    
      74
      1500
      0.456104
      300
    
    
      38
      900
      0.456627
      200
    
    
      129
      1000
      0.463412
      500
    
    
      96
      700
      0.466917
      400
    
    
      201
      2200
      0.470268
      700
    
    
      5
      600
      0.476676
      100
    
    
      138
      1900
      0.479596
      500
    
    
      137
      1800
      0.480367
      500
    
    
      81
      2200
      0.481218
      300
    
    
      50
      2100
      0.487751
      200
    
    
      91
      200
      0.494233
      400
    
    
      325
      2600
      0.495411
      1300
    
    
      281
      1200
      0.497821
      1200
    
    
      274
      500
      0.498614
      1200
    
    
      199
      2000
      0.499661
      700
    
    
      70
      1100
      0.505915
      300
    
    
      209
      3000
      0.507227
      700
    
    
      107
      1800
      0.508572
      400
    
    
      200
      2100
      0.515037
      700
    
    
      ...
      ...
      ...
      ...
    
    
      525
      1600
      1.943833
      2000
    
    
      514
      500
      1.949667
      2000
    
    
      585
      1600
      1.953655
      2300
    
    
      592
      2300
      1.971928
      2300
    
    
      557
      1800
      1.979190
      2100
    
    
      573
      400
      1.996904
      2300
    
    
      653
      2400
      2.041562
      2500
    
    
      498
      1900
      2.058810
      1900
    
    
      609
      1000
      2.086255
      2400
    
    
      495
      1600
      2.103520
      1900
    
    
      471
      2200
      2.115087
      1800
    
    
      542
      300
      2.122615
      2100
    
    
      524
      1500
      2.130169
      2000
    
    
      644
      1500
      2.136463
      2500
    
    
      480
      100
      2.140356
      1900
    
    
      633
      400
      2.149602
      2500
    
    
      645
      1600
      2.418658
      2500
    
    
      632
      300
      2.446479
      2500
    
    
      641
      1200
      2.499257
      2500
    
    
      634
      500
      2.529464
      2500
    
    
      559
      2000
      2.588829
      2100
    
    
      472
      2300
      2.819764
      1800
    
    
      577
      800
      2.863754
      2300
    
    
      424
      500
      2.867052
      1700
    
    
      643
      1400
      2.915187
      2500
    
    
      617
      1800
      2.998898
      2400
    
    
      572
      300
      3.010229
      2300
    
    
      631
      200
      3.620658
      2500
    
    
      540
      100
      4.513336
      2100
    
    
      630
      100
      7.957334
      2500
    
  

660 rows × 3 columns



In [113]:

    
res1 = result.as_matrix(columns=['First'])
res2 = result.as_matrix(columns=['Second'])
res3 = result.as_matrix(columns=['MSE'])



In [114]:

    
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(res1[:,0], res2[:,0], res3[:,0], cmap=cm.jet, linewidth=0.2)
plt.show()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-114-e7a6e4d55ac9> in <module>()
      1 fig = plt.figure()
      2 ax = fig.gca(projection='3d')
----> 3 ax.plot_trisurf(res1[:,0], res2[:,0], res3[:,0], cmap=cm.jet, linewidth=0.2)
      4 plt.show()

C:\Users\sanderkd\Anaconda2\lib\site-packages\mpl_toolkits\mplot3d\axes3d.pyc in plot_trisurf(self, *args, **kwargs)
   1871         lightsource = kwargs.pop('lightsource', None)
   1872 
-> 1873         tri, args, kwargs = Triangulation.get_from_args_and_kwargs(*args, **kwargs)
   1874         if 'Z' in kwargs:
   1875             z = np.asarray(kwargs.pop('Z'))

C:\Users\sanderkd\Anaconda2\lib\site-packages\matplotlib\tri\triangulation.pyc in get_from_args_and_kwargs(*args, **kwargs)
    163             mask = kwargs.pop('mask', None)
    164 
--> 165             triangulation = Triangulation(x, y, triangles, mask)
    166         return triangulation, args, kwargs
    167 

C:\Users\sanderkd\Anaconda2\lib\site-packages\matplotlib\tri\triangulation.pyc in __init__(self, x, y, triangles, mask)
     53             # No triangulation specified, so use matplotlib._qhull to obtain
     54             # Delaunay triangulation.
---> 55             self.triangles, self._neighbors = _qhull.delaunay(x, y)
     56             self.is_delaunay = True
     57         else:

ValueError: x and y must be 1D arrays of the same length



In [117]:

    
min = 30
for i in range(len(arr)):
    plt.plot(df_arr[i]['Second'], df_arr[i]['MSE'])
plt.show()



In [130]:

    
num = 5
df_arr[5].as_matrix(columns=['MSE']))
df_arr[5].head(21)



In [118]:

    
for i in range(len(arr)):
    plt.plot(df_arr[i]['Second'], df_arr[i]['MSE'])
plt.ylim(.3, .5)
plt.show()



In [120]:

    
plt.plot(df_arr[0]['Second'], df_arr[0]['MSE'])









    Out[120]:





[<matplotlib.lines.Line2D at 0xbd55a90>]



In [ ]:



In [ ]:



In [71]:

    
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np

n_angles = 36
n_radii = 8

# An array of radii
# Does not include radius r=0, this is to eliminate duplicate points
radii = np.linspace(0.125, 1.0, n_radii)

# An array of angles
angles = np.linspace(0, 2*np.pi, n_angles, endpoint=False)

# Repeat all angles for each radius
angles = np.repeat(angles[...,np.newaxis], n_radii, axis=1)

# Convert polar (radii, angles) coords to cartesian (x, y) coords
# (0, 0) is added here. There are no duplicate points in the (x, y) plane
x = np.append(0, (radii*np.cos(angles)).flatten())
y = np.append(0, (radii*np.sin(angles)).flatten())

# Pringle surface
z = np.sin(-x*y)

fig = plt.figure()
ax = fig.gca(projection='3d')

ax.plot_trisurf(x, y, z, cmap=cm.jet, linewidth=0.2)

plt.show()



In [75]:

    
z









    Out[75]:





array([ -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
        -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
        -2.67202919e-03,  -1.06879260e-02,  -2.40459735e-02,
        -4.27394954e-02,  -6.67511389e-02,  -9.60448860e-02,
        -1.30555829e-01,  -1.70177774e-01,  -5.02175709e-03,
        -2.00857620e-02,  -4.51806186e-02,  -8.02620259e-02,
        -1.25214922e-01,  -1.79800867e-01,  -2.43591451e-01,
        -3.15889304e-01,  -6.76577185e-03,  -2.70599904e-02,
        -6.08547878e-02,  -1.08041867e-01,  -1.68340191e-01,
        -2.41168436e-01,  -3.25485696e-01,  -4.19607349e-01,
        -7.69373467e-03,  -3.07703846e-02,  -6.91889733e-02,
        -1.22790296e-01,  -1.91161433e-01,  -2.73449293e-01,
        -3.68129759e-01,  -4.72745546e-01,  -7.69373467e-03,
        -3.07703846e-02,  -6.91889733e-02,  -1.22790296e-01,
        -1.91161433e-01,  -2.73449293e-01,  -3.68129759e-01,
        -4.72745546e-01,  -6.76577185e-03,  -2.70599904e-02,
        -6.08547878e-02,  -1.08041867e-01,  -1.68340191e-01,
        -2.41168436e-01,  -3.25485696e-01,  -4.19607349e-01,
        -5.02175709e-03,  -2.00857620e-02,  -4.51806186e-02,
        -8.02620259e-02,  -1.25214922e-01,  -1.79800867e-01,
        -2.43591451e-01,  -3.15889304e-01,  -2.67202919e-03,
        -1.06879260e-02,  -2.40459735e-02,  -4.27394954e-02,
        -6.67511389e-02,  -9.60448860e-02,  -1.30555829e-01,
        -1.70177774e-01,  -9.56755312e-19,  -3.82702125e-18,
        -8.61079781e-18,  -1.53080850e-17,  -2.39188828e-17,
        -3.44431912e-17,  -4.68810103e-17,  -6.12323400e-17,
         2.67202919e-03,   1.06879260e-02,   2.40459735e-02,
         4.27394954e-02,   6.67511389e-02,   9.60448860e-02,
         1.30555829e-01,   1.70177774e-01,   5.02175709e-03,
         2.00857620e-02,   4.51806186e-02,   8.02620259e-02,
         1.25214922e-01,   1.79800867e-01,   2.43591451e-01,
         3.15889304e-01,   6.76577185e-03,   2.70599904e-02,
         6.08547878e-02,   1.08041867e-01,   1.68340191e-01,
         2.41168436e-01,   3.25485696e-01,   4.19607349e-01,
         7.69373467e-03,   3.07703846e-02,   6.91889733e-02,
         1.22790296e-01,   1.91161433e-01,   2.73449293e-01,
         3.68129759e-01,   4.72745546e-01,   7.69373467e-03,
         3.07703846e-02,   6.91889733e-02,   1.22790296e-01,
         1.91161433e-01,   2.73449293e-01,   3.68129759e-01,
         4.72745546e-01,   6.76577185e-03,   2.70599904e-02,
         6.08547878e-02,   1.08041867e-01,   1.68340191e-01,
         2.41168436e-01,   3.25485696e-01,   4.19607349e-01,
         5.02175709e-03,   2.00857620e-02,   4.51806186e-02,
         8.02620259e-02,   1.25214922e-01,   1.79800867e-01,
         2.43591451e-01,   3.15889304e-01,   2.67202919e-03,
         1.06879260e-02,   2.40459735e-02,   4.27394954e-02,
         6.67511389e-02,   9.60448860e-02,   1.30555829e-01,
         1.70177774e-01,   1.91351062e-18,   7.65404249e-18,
         1.72215956e-17,   3.06161700e-17,   4.78377656e-17,
         6.88863825e-17,   9.37620206e-17,   1.22464680e-16,
        -2.67202919e-03,  -1.06879260e-02,  -2.40459735e-02,
        -4.27394954e-02,  -6.67511389e-02,  -9.60448860e-02,
        -1.30555829e-01,  -1.70177774e-01,  -5.02175709e-03,
        -2.00857620e-02,  -4.51806186e-02,  -8.02620259e-02,
        -1.25214922e-01,  -1.79800867e-01,  -2.43591451e-01,
        -3.15889304e-01,  -6.76577185e-03,  -2.70599904e-02,
        -6.08547878e-02,  -1.08041867e-01,  -1.68340191e-01,
        -2.41168436e-01,  -3.25485696e-01,  -4.19607349e-01,
        -7.69373467e-03,  -3.07703846e-02,  -6.91889733e-02,
        -1.22790296e-01,  -1.91161433e-01,  -2.73449293e-01,
        -3.68129759e-01,  -4.72745546e-01,  -7.69373467e-03,
        -3.07703846e-02,  -6.91889733e-02,  -1.22790296e-01,
        -1.91161433e-01,  -2.73449293e-01,  -3.68129759e-01,
        -4.72745546e-01,  -6.76577185e-03,  -2.70599904e-02,
        -6.08547878e-02,  -1.08041867e-01,  -1.68340191e-01,
        -2.41168436e-01,  -3.25485696e-01,  -4.19607349e-01,
        -5.02175709e-03,  -2.00857620e-02,  -4.51806186e-02,
        -8.02620259e-02,  -1.25214922e-01,  -1.79800867e-01,
        -2.43591451e-01,  -3.15889304e-01,  -2.67202919e-03,
        -1.06879260e-02,  -2.40459735e-02,  -4.27394954e-02,
        -6.67511389e-02,  -9.60448860e-02,  -1.30555829e-01,
        -1.70177774e-01,  -2.87026594e-18,  -1.14810637e-17,
        -2.58323934e-17,  -4.59242550e-17,  -7.17566484e-17,
        -1.03329574e-16,  -1.40643031e-16,  -1.83697020e-16,
         2.67202919e-03,   1.06879260e-02,   2.40459735e-02,
         4.27394954e-02,   6.67511389e-02,   9.60448860e-02,
         1.30555829e-01,   1.70177774e-01,   5.02175709e-03,
         2.00857620e-02,   4.51806186e-02,   8.02620259e-02,
         1.25214922e-01,   1.79800867e-01,   2.43591451e-01,
         3.15889304e-01,   6.76577185e-03,   2.70599904e-02,
         6.08547878e-02,   1.08041867e-01,   1.68340191e-01,
         2.41168436e-01,   3.25485696e-01,   4.19607349e-01,
         7.69373467e-03,   3.07703846e-02,   6.91889733e-02,
         1.22790296e-01,   1.91161433e-01,   2.73449293e-01,
         3.68129759e-01,   4.72745546e-01,   7.69373467e-03,
         3.07703846e-02,   6.91889733e-02,   1.22790296e-01,
         1.91161433e-01,   2.73449293e-01,   3.68129759e-01,
         4.72745546e-01,   6.76577185e-03,   2.70599904e-02,
         6.08547878e-02,   1.08041867e-01,   1.68340191e-01,
         2.41168436e-01,   3.25485696e-01,   4.19607349e-01,
         5.02175709e-03,   2.00857620e-02,   4.51806186e-02,
         8.02620259e-02,   1.25214922e-01,   1.79800867e-01,
         2.43591451e-01,   3.15889304e-01,   2.67202919e-03,
         1.06879260e-02,   2.40459735e-02,   4.27394954e-02,
         6.67511389e-02,   9.60448860e-02,   1.30555829e-01,
         1.70177774e-01])



In [ ]:

    
arr np.array()

	Steps	MSE
0	100	386
3	200	344
1	300	306
2	400	279
5	500	250
6	600	210
7	700	179
8	800	156
9	900	129

	Second	MSE	First
125	600	0.357921	500
170	2100	0.373738	600
86	2700	0.388915	300
51	2200	0.408906	200
12	1300	0.409569	100
64	500	0.418992	300
97	800	0.422525	400
173	2400	0.426857	600
122	300	0.431968	500
205	2600	0.438323	700
106	1700	0.438949	400
74	1500	0.456104	300
38	900	0.456627	200
129	1000	0.463412	500
96	700	0.466917	400
201	2200	0.470268	700
5	600	0.476676	100
138	1900	0.479596	500
137	1800	0.480367	500
81	2200	0.481218	300
50	2100	0.487751	200
91	200	0.494233	400
325	2600	0.495411	1300
281	1200	0.497821	1200
274	500	0.498614	1200
199	2000	0.499661	700
70	1100	0.505915	300
209	3000	0.507227	700
107	1800	0.508572	400
200	2100	0.515037	700
...	...	...	...
525	1600	1.943833	2000
514	500	1.949667	2000
585	1600	1.953655	2300
592	2300	1.971928	2300
557	1800	1.979190	2100
573	400	1.996904	2300
653	2400	2.041562	2500
498	1900	2.058810	1900
609	1000	2.086255	2400
495	1600	2.103520	1900
471	2200	2.115087	1800
542	300	2.122615	2100
524	1500	2.130169	2000
644	1500	2.136463	2500
480	100	2.140356	1900
633	400	2.149602	2500
645	1600	2.418658	2500
632	300	2.446479	2500
641	1200	2.499257	2500
634	500	2.529464	2500
559	2000	2.588829	2100
472	2300	2.819764	1800
577	800	2.863754	2300
424	500	2.867052	1700
643	1400	2.915187	2500
617	1800	2.998898	2400
572	300	3.010229	2300
631	200	3.620658	2500
540	100	4.513336	2100
630	100	7.957334	2500

	Second	MSE	First
0	100	0.935536	600
1	200	0.818328	600
2	300	1.099286	600
3	400	0.525195	600
4	500	0.519247	600
5	600	0.749099	600
6	700	0.706251	600
8	800	0.856198	600
7	900	0.633568	600
9	1000	0.606479	600
11	1100	0.911996	600
12	1200	0.703380	600
10	1300	0.624788	600
15	1400	0.799366	600
14	1500	0.576648	600
13	1600	0.657963	600
17	1700	0.732750	600
18	1800	0.826755	600
16	1900	1.028288	600
19	2000	0.653255	600
21	2100	0.373738	600

	Steps	MSE
0	100	386
3	200	344
1	300	306
2	400	279
5	500	250
6	600	210
7	700	179
8	800	156
9	900	129

	Steps	MSE
0	100	386
3	200	344
1	300	306
2	400	279
5	500	250
6	600	210
7	700	179
8	800	156
9	900	129