In [1]:
! wget -N http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data
In [72]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
In [3]:
data = pd.read_csv('abalone.data', names=['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings'])
data.head()
Out[3]:
Now let's convert categorical feature 'Sex' to numerical via one-hot encoding
In [5]:
data = pd.get_dummies(data)
data.head()
Out[5]:
In [6]:
data.describe()
Out[6]:
In [77]:
data.corr()
Out[77]:
In [7]:
X = data.drop(columns=['Rings'])
X = StandardScaler().fit_transform(X)
y = data['Rings']
In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=17)
In [71]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
In [79]:
def score(model):
model.fit(X_train, y_train)
print('Train score: {}'.format(model.score(X_train, y_train)))
print('Test score: {}'.format(model.score(X_test, y_test)))
In [80]:
score(KNeighborsClassifier(29))
In [81]:
score(SVC(kernel='linear'))
In [82]:
score(DecisionTreeClassifier(max_depth=4))
In [83]:
score(RandomForestClassifier(max_depth=4, n_estimators=10, max_features=2))
In [84]:
score(MLPClassifier(alpha=1))
In [85]:
score(AdaBoostClassifier())
In [89]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
In [86]:
score(LinearRegression())
In [87]:
score(SVR(kernel='rbf', C=1e3, gamma=0.1))
In [88]:
score(SVR(kernel='poly', C=1e3, degree=2))