notebook.community

Edit and run



In [1]:

    
! wget -N http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data









    



--2018-05-25 05:54:57--  http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... failed: Temporary failure in name resolution.
wget: unable to resolve host address ‘archive.ics.uci.edu’



In [72]:

    
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline



In [3]:

    
data = pd.read_csv('abalone.data', names=['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings'])
data.head()









    Out[3]:







  
    
      
      Sex
      Length
      Diameter
      Height
      Whole weight
      Shucked weight
      Viscera weight
      Shell weight
      Rings
    
  
  
    
      0
      M
      0.455
      0.365
      0.095
      0.5140
      0.2245
      0.1010
      0.150
      15
    
    
      1
      M
      0.350
      0.265
      0.090
      0.2255
      0.0995
      0.0485
      0.070
      7
    
    
      2
      F
      0.530
      0.420
      0.135
      0.6770
      0.2565
      0.1415
      0.210
      9
    
    
      3
      M
      0.440
      0.365
      0.125
      0.5160
      0.2155
      0.1140
      0.155
      10
    
    
      4
      I
      0.330
      0.255
      0.080
      0.2050
      0.0895
      0.0395
      0.055
      7

Now let's convert categorical feature 'Sex' to numerical via one-hot encoding



In [5]:

    
data = pd.get_dummies(data)
data.head()









    Out[5]:







  
    
      
      Length
      Diameter
      Height
      Whole weight
      Shucked weight
      Viscera weight
      Shell weight
      Rings
      Sex_F
      Sex_I
      Sex_M
    
  
  
    
      0
      0.455
      0.365
      0.095
      0.5140
      0.2245
      0.1010
      0.150
      15
      0
      0
      1
    
    
      1
      0.350
      0.265
      0.090
      0.2255
      0.0995
      0.0485
      0.070
      7
      0
      0
      1
    
    
      2
      0.530
      0.420
      0.135
      0.6770
      0.2565
      0.1415
      0.210
      9
      1
      0
      0
    
    
      3
      0.440
      0.365
      0.125
      0.5160
      0.2155
      0.1140
      0.155
      10
      0
      0
      1
    
    
      4
      0.330
      0.255
      0.080
      0.2050
      0.0895
      0.0395
      0.055
      7
      0
      1
      0

Analysis



In [6]:

    
data.describe()









    Out[6]:







  
    
      
      Length
      Diameter
      Height
      Whole weight
      Shucked weight
      Viscera weight
      Shell weight
      Rings
      Sex_F
      Sex_I
      Sex_M
    
  
  
    
      count
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
      4177.000000
    
    
      mean
      0.523992
      0.407881
      0.139516
      0.828742
      0.359367
      0.180594
      0.238831
      9.933684
      0.312904
      0.321283
      0.365813
    
    
      std
      0.120093
      0.099240
      0.041827
      0.490389
      0.221963
      0.109614
      0.139203
      3.224169
      0.463731
      0.467025
      0.481715
    
    
      min
      0.075000
      0.055000
      0.000000
      0.002000
      0.001000
      0.000500
      0.001500
      1.000000
      0.000000
      0.000000
      0.000000
    
    
      25%
      0.450000
      0.350000
      0.115000
      0.441500
      0.186000
      0.093500
      0.130000
      8.000000
      0.000000
      0.000000
      0.000000
    
    
      50%
      0.545000
      0.425000
      0.140000
      0.799500
      0.336000
      0.171000
      0.234000
      9.000000
      0.000000
      0.000000
      0.000000
    
    
      75%
      0.615000
      0.480000
      0.165000
      1.153000
      0.502000
      0.253000
      0.329000
      11.000000
      1.000000
      1.000000
      1.000000
    
    
      max
      0.815000
      0.650000
      1.130000
      2.825500
      1.488000
      0.760000
      1.005000
      29.000000
      1.000000
      1.000000
      1.000000



In [77]:

    
data.corr()









    Out[77]:







  
    
      
      Length
      Diameter
      Height
      Whole weight
      Shucked weight
      Viscera weight
      Shell weight
      Rings
      Sex_F
      Sex_I
      Sex_M
    
  
  
    
      Length
      1.000000
      0.986812
      0.827554
      0.925261
      0.897914
      0.903018
      0.897706
      0.556720
      0.309666
      -0.551465
      0.236543
    
    
      Diameter
      0.986812
      1.000000
      0.833684
      0.925452
      0.893162
      0.899724
      0.905330
      0.574660
      0.318626
      -0.564315
      0.240376
    
    
      Height
      0.827554
      0.833684
      1.000000
      0.819221
      0.774972
      0.798319
      0.817338
      0.557467
      0.298421
      -0.518552
      0.215459
    
    
      Whole weight
      0.925261
      0.925452
      0.819221
      1.000000
      0.969405
      0.966375
      0.955355
      0.540390
      0.299741
      -0.557592
      0.252038
    
    
      Shucked weight
      0.897914
      0.893162
      0.774972
      0.969405
      1.000000
      0.931961
      0.882617
      0.420884
      0.263991
      -0.521842
      0.251793
    
    
      Viscera weight
      0.903018
      0.899724
      0.798319
      0.966375
      0.931961
      1.000000
      0.907656
      0.503819
      0.308444
      -0.556081
      0.242194
    
    
      Shell weight
      0.897706
      0.905330
      0.817338
      0.955355
      0.882617
      0.907656
      1.000000
      0.627574
      0.306319
      -0.546953
      0.235391
    
    
      Rings
      0.556720
      0.574660
      0.557467
      0.540390
      0.420884
      0.503819
      0.627574
      1.000000
      0.250279
      -0.436063
      0.181831
    
    
      Sex_F
      0.309666
      0.318626
      0.298421
      0.299741
      0.263991
      0.308444
      0.306319
      0.250279
      1.000000
      -0.464298
      -0.512528
    
    
      Sex_I
      -0.551465
      -0.564315
      -0.518552
      -0.557592
      -0.521842
      -0.556081
      -0.546953
      -0.436063
      -0.464298
      1.000000
      -0.522541
    
    
      Sex_M
      0.236543
      0.240376
      0.215459
      0.252038
      0.251793
      0.242194
      0.235391
      0.181831
      -0.512528
      -0.522541
      1.000000



In [7]:

    
X = data.drop(columns=['Rings'])
X = StandardScaler().fit_transform(X)
y = data['Rings']



In [8]:

    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=17)

Classification



In [71]:

    
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier



In [79]:

    
def score(model):
    model.fit(X_train, y_train)
    print('Train score: {}'.format(model.score(X_train, y_train)))
    print('Test score: {}'.format(model.score(X_test, y_test)))

K-Neighbors



In [80]:

    
score(KNeighborsClassifier(29))









    



Train score: 0.3273766976411723
Test score: 0.24945612762871647

SVM + linear kernel



In [81]:

    
score(SVC(kernel='linear'))









    



Train score: 0.27662616154395997
Test score: 0.25598259608411894

Decision tree



In [82]:

    
score(DecisionTreeClassifier(max_depth=4))









    



Train score: 0.2948534667619728
Test score: 0.2574329224075417

Random forest



In [83]:

    
score(RandomForestClassifier(max_depth=4, n_estimators=10, max_features=2))









    



Train score: 0.3012866333095068
Test score: 0.25670775924583034

Multi-layer perceptron



In [84]:

    
score(MLPClassifier(alpha=1))









    



Train score: 0.29592566118656183
Test score: 0.2625090645395214

AdaBoost



In [85]:

    
score(AdaBoostClassifier())









    



Train score: 0.21300929235167976
Test score: 0.2189992748368383

Regression



In [89]:

    
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

Linear regression



In [86]:

    
score(LinearRegression())









    



Train score: 0.5346804750082439
Test score: 0.5423371485898663

SVM + RBF kernel



In [87]:

    
score(SVR(kernel='rbf', C=1e3, gamma=0.1))









    



Train score: 0.6513960441280235
Test score: 0.4730994087829582

SVM + polynomial kernel



In [88]:

    
score(SVR(kernel='poly', C=1e3, degree=2))









    



Train score: 0.3880144594339444
Test score: 0.2779446024994301

	Sex	Length	Diameter	Height	Whole weight	Shucked weight	Viscera weight	Shell weight	Rings
0	M	0.455	0.365	0.095	0.5140	0.2245	0.1010	0.150	15
1	M	0.350	0.265	0.090	0.2255	0.0995	0.0485	0.070	7
2	F	0.530	0.420	0.135	0.6770	0.2565	0.1415	0.210	9
3	M	0.440	0.365	0.125	0.5160	0.2155	0.1140	0.155	10
4	I	0.330	0.255	0.080	0.2050	0.0895	0.0395	0.055	7

	Length	Diameter	Height	Whole weight	Shucked weight	Viscera weight	Shell weight	Rings	Sex_F	Sex_I	Sex_M
count	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000	4177.000000
mean	0.523992	0.407881	0.139516	0.828742	0.359367	0.180594	0.238831	9.933684	0.312904	0.321283	0.365813
std	0.120093	0.099240	0.041827	0.490389	0.221963	0.109614	0.139203	3.224169	0.463731	0.467025	0.481715
min	0.075000	0.055000	0.000000	0.002000	0.001000	0.000500	0.001500	1.000000	0.000000	0.000000	0.000000
25%	0.450000	0.350000	0.115000	0.441500	0.186000	0.093500	0.130000	8.000000	0.000000	0.000000	0.000000
50%	0.545000	0.425000	0.140000	0.799500	0.336000	0.171000	0.234000	9.000000	0.000000	0.000000	0.000000
75%	0.615000	0.480000	0.165000	1.153000	0.502000	0.253000	0.329000	11.000000	1.000000	1.000000	1.000000
max	0.815000	0.650000	1.130000	2.825500	1.488000	0.760000	1.005000	29.000000	1.000000	1.000000	1.000000

	Length	Diameter	Height	Whole weight	Shucked weight	Viscera weight	Shell weight	Rings	Sex_F	Sex_I	Sex_M
Length	1.000000	0.986812	0.827554	0.925261	0.897914	0.903018	0.897706	0.556720	0.309666	-0.551465	0.236543
Diameter	0.986812	1.000000	0.833684	0.925452	0.893162	0.899724	0.905330	0.574660	0.318626	-0.564315	0.240376
Height	0.827554	0.833684	1.000000	0.819221	0.774972	0.798319	0.817338	0.557467	0.298421	-0.518552	0.215459
Whole weight	0.925261	0.925452	0.819221	1.000000	0.969405	0.966375	0.955355	0.540390	0.299741	-0.557592	0.252038
Shucked weight	0.897914	0.893162	0.774972	0.969405	1.000000	0.931961	0.882617	0.420884	0.263991	-0.521842	0.251793
Viscera weight	0.903018	0.899724	0.798319	0.966375	0.931961	1.000000	0.907656	0.503819	0.308444	-0.556081	0.242194
Shell weight	0.897706	0.905330	0.817338	0.955355	0.882617	0.907656	1.000000	0.627574	0.306319	-0.546953	0.235391
Rings	0.556720	0.574660	0.557467	0.540390	0.420884	0.503819	0.627574	1.000000	0.250279	-0.436063	0.181831
Sex_F	0.309666	0.318626	0.298421	0.299741	0.263991	0.308444	0.306319	0.250279	1.000000	-0.464298	-0.512528
Sex_I	-0.551465	-0.564315	-0.518552	-0.557592	-0.521842	-0.556081	-0.546953	-0.436063	-0.464298	1.000000	-0.522541
Sex_M	0.236543	0.240376	0.215459	0.252038	0.251793	0.242194	0.235391	0.181831	-0.512528	-0.522541	1.000000