In [1]:
# import
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_boston
#from sklearn.metrics import scorer
%matplotlib inline
Explore the WineQuality data and apply the Linear Regression
In [2]:
name = ['Quality','Alcohol','Malic acid', 'Ash', 'Alcalinity of ash ', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']
wine = pd.read_csv("data/wine.data", names=name)
#print(wine.describe)
wine[:5]
Out[2]:
In [3]:
# plotting the data
fig, ax = plt.subplots(figsize=(9,6))
X = wine['Alcohol']
y = wine['Quality']
ax.plot(X, y, 'b.')
ax.set_xlabel("Alcohol")
ax.set_ylabel("Quality")
ax.margins(0.2)
ax.grid(True)
In [4]:
lReg = LinearRegression()
X = wine[list(wine.columns)[1:]]
y = wine['Quality']
#print (X[:2])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
# fitting the data
lReg.fit(X_train, y_train)
# predicting
y_pred = lReg.predict(X_test)
print('R-squared:', lReg.score(X_test, y_test))
In [5]:
# cross validation scores
scores = cross_val_score(lReg, X_train, y_train, cv=5)
print("Scores",scores)
print("Mean Score",np.mean(scores))
In [6]:
plt.scatter(y_test, y_pred)
plt.xlabel("Quality")
plt.ylabel("Predicted Quality")
Out[6]:
There are two varieties of gradient descent that are distinguished by the number of training instances that are used to update the model parameters in each training iteration. Batch gradient descent, which is sometimes called only gradient descent, uses all of the training instances to update the model parameters in each iteration. Stochastic Gradient Descent (SGD), in contrast, updates the parameters using only a single training instance in each iteration. The training instance is usually selected randomly. Stochastic gradient descent is often preferred to optimize cost functions when there are hundreds of thousands of training instances or more, as it will converge more quickly than batch gradient descent. Batch gradient descent is a deterministic algorithm, and will produce the same parameter values given the same training set.
As a stochastic algorithm, SGD can produce different parameter estimates each time it is run. SGD may not minimize the cost function as well as gradient descent because it uses only single training instances to update the weights. Its approximation is often close enough, particularly for convex cost functions such as residual sum of squares.
In [7]:
boston = load_boston()
# splitting the data
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=27)
#
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train)
X_test = X_scaler.transform(X_test)
y_test = y_scaler.transform(y_test)
regressor = SGDRegressor(loss='squared_loss')
scores = cross_val_score(regressor, X_train, y_train, cv=5)
print ('Cross validation r-squared scores:', scores)
print ('Average cross validation r-squared score:', np.mean(scores))
regressor.fit_transform(X_train, y_train)
print ('Test set r-squared score', regressor.score(X_test, y_test))
In [ ]: