notebook.community

Edit and run



In [2]:

    
%matplotlib inline



In [12]:

    
ls data/









    



iris.data  iris.scale



In [60]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('data/iris.data', header=None, sep=',', names=['A', 'B', "C", "D", "label"])
label = df['label']
dataset = df[df.columns[0:4]]
label.head()
dataset.head()



In [74]:

    
dfmax = dataset.max()
print dfmax
dfmin = dataset.min()
print dfmin
dflen = dfmax[0:4] - dfmin[0:4]
print dflen
dataset.head()
    
(dfmax-dfmin).T









    



A    7.9
B    4.4
C    6.9
D    2.5
dtype: float64
A    4.3
B    2.0
C    1.0
D    0.1
dtype: float64
A    3.6
B    2.4
C    5.9
D    2.4
dtype: float64






    Out[74]:





A    3.6
B    2.4
C    5.9
D    2.4
dtype: float64



In [83]:

    
scaledDataSet = (dataset -(dfmax+dfmin)/2) / (dflen / 2)



In [85]:

    
scaledDataSet.head()



In [91]:

    
df[df.columns[0:4]] = scaledDataSet
df.shape









    Out[91]:





(150, 5)



In [170]:

    
tmp =  label.drop_duplicates()
tmp.values
tmp.values.searchsorted("Iris-virginica")









    Out[170]:





2



In [184]:

    
df['label'] = tmp.values.searchsorted(label)+1









    Out[184]:






  
    
      
      A
      B
      C
      D
      label
    
  
  
    
      0
      -0.555556
      0.250000
      -0.864407
      -0.916667
      1
    
    
      1
      -0.666667
      -0.166667
      -0.864407
      -0.916667
      1
    
    
      2
      -0.777778
      0.000000
      -0.898305
      -0.916667
      1
    
    
      3
      -0.833333
      -0.083333
      -0.830508
      -0.916667
      1
    
    
      4
      -0.611111
      0.333333
      -0.864407
      -0.916667
      1
    
    
      5
      -0.388889
      0.583333
      -0.762712
      -0.750000
      1
    
    
      6
      -0.833333
      0.166667
      -0.864407
      -0.833333
      1
    
    
      7
      -0.611111
      0.166667
      -0.830508
      -0.916667
      1
    
    
      8
      -0.944444
      -0.250000
      -0.864407
      -0.916667
      1
    
    
      9
      -0.666667
      -0.083333
      -0.830508
      -1.000000
      1
    
    
      10
      -0.388889
      0.416667
      -0.830508
      -0.916667
      1
    
    
      11
      -0.722222
      0.166667
      -0.796610
      -0.916667
      1
    
    
      12
      -0.722222
      -0.166667
      -0.864407
      -1.000000
      1
    
    
      13
      -1.000000
      -0.166667
      -0.966102
      -1.000000
      1
    
    
      14
      -0.166667
      0.666667
      -0.932203
      -0.916667
      1
    
    
      15
      -0.222222
      1.000000
      -0.830508
      -0.750000
      1
    
    
      16
      -0.388889
      0.583333
      -0.898305
      -0.750000
      1
    
    
      17
      -0.555556
      0.250000
      -0.864407
      -0.833333
      1
    
    
      18
      -0.222222
      0.500000
      -0.762712
      -0.833333
      1
    
    
      19
      -0.555556
      0.500000
      -0.830508
      -0.833333
      1
    
    
      20
      -0.388889
      0.166667
      -0.762712
      -0.916667
      1
    
    
      21
      -0.555556
      0.416667
      -0.830508
      -0.750000
      1
    
    
      22
      -0.833333
      0.333333
      -1.000000
      -0.916667
      1
    
    
      23
      -0.555556
      0.083333
      -0.762712
      -0.666667
      1
    
    
      24
      -0.722222
      0.166667
      -0.694915
      -0.916667
      1
    
    
      25
      -0.611111
      -0.166667
      -0.796610
      -0.916667
      1
    
    
      26
      -0.611111
      0.166667
      -0.796610
      -0.750000
      1
    
    
      27
      -0.500000
      0.250000
      -0.830508
      -0.916667
      1
    
    
      28
      -0.500000
      0.166667
      -0.864407
      -0.916667
      1
    
    
      29
      -0.777778
      0.000000
      -0.796610
      -0.916667
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      120
      0.444444
      0.000000
      0.593220
      0.833333
      3
    
    
      121
      -0.277778
      -0.333333
      0.322034
      0.583333
      3
    
    
      122
      0.888889
      -0.333333
      0.932203
      0.583333
      3
    
    
      123
      0.111111
      -0.416667
      0.322034
      0.416667
      3
    
    
      124
      0.333333
      0.083333
      0.593220
      0.666667
      3
    
    
      125
      0.611111
      0.000000
      0.694915
      0.416667
      3
    
    
      126
      0.055556
      -0.333333
      0.288136
      0.416667
      3
    
    
      127
      0.000000
      -0.166667
      0.322034
      0.416667
      3
    
    
      128
      0.166667
      -0.333333
      0.559322
      0.666667
      3
    
    
      129
      0.611111
      -0.166667
      0.627119
      0.250000
      3
    
    
      130
      0.722222
      -0.333333
      0.728814
      0.500000
      3
    
    
      131
      1.000000
      0.500000
      0.830508
      0.583333
      3
    
    
      132
      0.166667
      -0.333333
      0.559322
      0.750000
      3
    
    
      133
      0.111111
      -0.333333
      0.389831
      0.166667
      3
    
    
      134
      0.000000
      -0.500000
      0.559322
      0.083333
      3
    
    
      135
      0.888889
      -0.166667
      0.728814
      0.833333
      3
    
    
      136
      0.111111
      0.166667
      0.559322
      0.916667
      3
    
    
      137
      0.166667
      -0.083333
      0.525424
      0.416667
      3
    
    
      138
      -0.055556
      -0.166667
      0.288136
      0.416667
      3
    
    
      139
      0.444444
      -0.083333
      0.491525
      0.666667
      3
    
    
      140
      0.333333
      -0.083333
      0.559322
      0.916667
      3
    
    
      141
      0.444444
      -0.083333
      0.389831
      0.833333
      3
    
    
      142
      -0.166667
      -0.416667
      0.389831
      0.500000
      3
    
    
      143
      0.388889
      0.000000
      0.661017
      0.833333
      3
    
    
      144
      0.333333
      0.083333
      0.593220
      1.000000
      3
    
    
      145
      0.333333
      -0.166667
      0.423729
      0.833333
      3
    
    
      146
      0.111111
      -0.583333
      0.355932
      0.500000
      3
    
    
      147
      0.222222
      -0.166667
      0.423729
      0.583333
      3
    
    
      148
      0.055556
      0.166667
      0.491525
      0.833333
      3
    
    
      149
      -0.111111
      -0.166667
      0.389831
      0.416667
      3
    
  

150 rows × 5 columns



In [185]:

    
df.to_csv('data/test.tmp')



In [ ]:



In [ ]:



In [ ]:

	A	B	C	D
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	A	B	C	D
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	A	B	C	D
0	-0.555556	0.250000	-0.864407	-0.916667
1	-0.666667	-0.166667	-0.864407	-0.916667
2	-0.777778	0.000000	-0.898305	-0.916667
3	-0.833333	-0.083333	-0.830508	-0.916667
4	-0.611111	0.333333	-0.864407	-0.916667

	A	B	C	D
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	A	B	C	D
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2