notebook.community

Edit and run



In [37]:

    
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import KFold
from sklearn.model_selection import cross_val_score



In [3]:

    
data = pd.read_csv('resources/abalone.csv')



In [6]:

    
data['Sex'] = data['Sex'].map(lambda x: 1 if x == 'M' else (0 if x == 'I' else -1))



In [9]:

    
data.head(10)









    Out[9]:







  
    
      
      Sex
      Length
      Diameter
      Height
      WholeWeight
      ShuckedWeight
      VisceraWeight
      ShellWeight
      Rings
    
  
  
    
      0
      1
      0.455
      0.365
      0.095
      0.5140
      0.2245
      0.1010
      0.150
      15
    
    
      1
      1
      0.350
      0.265
      0.090
      0.2255
      0.0995
      0.0485
      0.070
      7
    
    
      2
      -1
      0.530
      0.420
      0.135
      0.6770
      0.2565
      0.1415
      0.210
      9
    
    
      3
      1
      0.440
      0.365
      0.125
      0.5160
      0.2155
      0.1140
      0.155
      10
    
    
      4
      0
      0.330
      0.255
      0.080
      0.2050
      0.0895
      0.0395
      0.055
      7
    
    
      5
      0
      0.425
      0.300
      0.095
      0.3515
      0.1410
      0.0775
      0.120
      8
    
    
      6
      -1
      0.530
      0.415
      0.150
      0.7775
      0.2370
      0.1415
      0.330
      20
    
    
      7
      -1
      0.545
      0.425
      0.125
      0.7680
      0.2940
      0.1495
      0.260
      16
    
    
      8
      1
      0.475
      0.370
      0.125
      0.5095
      0.2165
      0.1125
      0.165
      9
    
    
      9
      -1
      0.550
      0.440
      0.150
      0.8945
      0.3145
      0.1510
      0.320
      19



In [11]:

    
target = data['Rings']
target.head(10)









    Out[11]:





0    15
1     7
2     9
3    10
4     7
5     8
6    20
7    16
8     9
9    19
Name: Rings, dtype: int64



In [24]:

    
features = data.loc[:, :'ShellWeight']
features.head(10)









    Out[24]:







  
    
      
      Sex
      Length
      Diameter
      Height
      WholeWeight
      ShuckedWeight
      VisceraWeight
      ShellWeight
    
  
  
    
      0
      1
      0.455
      0.365
      0.095
      0.5140
      0.2245
      0.1010
      0.150
    
    
      1
      1
      0.350
      0.265
      0.090
      0.2255
      0.0995
      0.0485
      0.070
    
    
      2
      -1
      0.530
      0.420
      0.135
      0.6770
      0.2565
      0.1415
      0.210
    
    
      3
      1
      0.440
      0.365
      0.125
      0.5160
      0.2155
      0.1140
      0.155
    
    
      4
      0
      0.330
      0.255
      0.080
      0.2050
      0.0895
      0.0395
      0.055
    
    
      5
      0
      0.425
      0.300
      0.095
      0.3515
      0.1410
      0.0775
      0.120
    
    
      6
      -1
      0.530
      0.415
      0.150
      0.7775
      0.2370
      0.1415
      0.330
    
    
      7
      -1
      0.545
      0.425
      0.125
      0.7680
      0.2940
      0.1495
      0.260
    
    
      8
      1
      0.475
      0.370
      0.125
      0.5095
      0.2165
      0.1125
      0.165
    
    
      9
      -1
      0.550
      0.440
      0.150
      0.8945
      0.3145
      0.1510
      0.320



In [30]:

    
kfold = KFold(target.size, n_folds=5, shuffle=True, random_state=1)



In [38]:

    
scores = [0.0]
for n in range(1, 51):
    model = RandomForestRegressor(n_estimators=n, random_state=1)
    score = np.mean(cross_val_score(model, features, target, cv=kfold, scoring='r2'))
    scores.append(score)



In [41]:

    
for i,v in enumerate(scores):
    print(i, v)









    



(0, 0.0)
(1, 0.10213869487724367)
(2, 0.33841675515802144)
(3, 0.4035798494618691)
(4, 0.44272239896668103)
(5, 0.4640207660674969)
(6, 0.4705816327587792)
(7, 0.4758306163523006)
(8, 0.4817418456255852)
(9, 0.4883478130215681)
(10, 0.49446412480247826)
(11, 0.4933965550001963)
(12, 0.4979658763976154)
(13, 0.5021364605722853)
(14, 0.5064286962257328)
(15, 0.5083311970432101)
(16, 0.5105131438322147)
(17, 0.5138482947993028)
(18, 0.5163275412739493)
(19, 0.519034688136388)
(20, 0.5186735928723822)
(21, 0.5198354233542501)
(22, 0.5201583536722211)
(23, 0.5210172709366251)
(24, 0.5224031825808091)
(25, 0.5226174639676071)
(26, 0.5238061581717052)
(27, 0.5241223522188981)
(28, 0.525053745123244)
(29, 0.5259703756348931)
(30, 0.5265378216675755)
(31, 0.5270998544306023)
(32, 0.5283894082092784)
(33, 0.5296165561229277)
(34, 0.5295639554438379)
(35, 0.529517247440473)
(36, 0.5295996267890184)
(37, 0.52913141344746)
(38, 0.5291602576334833)
(39, 0.5292409810274339)
(40, 0.529067015963163)
(41, 0.5293420056911183)
(42, 0.5295732065337546)
(43, 0.5293754946648532)
(44, 0.5291192337563251)
(45, 0.5283754660277202)
(46, 0.5285204275493458)
(47, 0.5287049445791908)
(48, 0.5296127515437734)
(49, 0.5303944554434498)
(50, 0.5305963435394535)

	Sex	Length	Diameter	Height	WholeWeight	ShuckedWeight	VisceraWeight	ShellWeight	Rings
0	1	0.455	0.365	0.095	0.5140	0.2245	0.1010	0.150	15
1	1	0.350	0.265	0.090	0.2255	0.0995	0.0485	0.070	7
2	-1	0.530	0.420	0.135	0.6770	0.2565	0.1415	0.210	9
3	1	0.440	0.365	0.125	0.5160	0.2155	0.1140	0.155	10
4	0	0.330	0.255	0.080	0.2050	0.0895	0.0395	0.055	7
5	0	0.425	0.300	0.095	0.3515	0.1410	0.0775	0.120	8
6	-1	0.530	0.415	0.150	0.7775	0.2370	0.1415	0.330	20
7	-1	0.545	0.425	0.125	0.7680	0.2940	0.1495	0.260	16
8	1	0.475	0.370	0.125	0.5095	0.2165	0.1125	0.165	9
9	-1	0.550	0.440	0.150	0.8945	0.3145	0.1510	0.320	19