In [1]:
# Chapter 3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
In [2]:
# Read height - weight dataset
dataset = np.loadtxt('book_code/Section 2/height-weight.csv', delimiter=',')
In [3]:
#dataset.head()
In [4]:
dataset.shape
Out[4]:
In [5]:
dataset[:10]
Out[5]:
In [6]:
# Separate data
X = dataset[:,0]
Y = dataset[:,1]
In [7]:
X.shape
Out[7]:
In [8]:
X[:10]
Out[8]:
In [9]:
plt.scatter(X[:20], Y[:20], color='red', s=30)
Out[9]:
In [10]:
# Split the data into training and test
# Training data
X_train = X[:4500]
y_train = Y[:4500]
# Test dataset
X_test = X[4500:]
y_test = Y[4500:]
In [11]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
In [12]:
# There is a reshape explained in the video that doesn't work here
X_train = X_train.reshape(-1, 1)
X_train.shape
Out[12]:
In [13]:
X_train[:,:10]
Out[13]:
In [14]:
y_train = y_train.reshape(-1, 1)
y_train.shape
Out[14]:
In [15]:
y_train[:,:10]
Out[15]:
In [16]:
# Create linear regression object
regr = linear_model.LinearRegression()
In [17]:
# Train the model
regr.fit(X_train, y_train)
Out[17]:
In [18]:
# Reshape test vectors
X_test = X_test.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
In [19]:
# Predictions
y_pred = regr.predict(X_test)
In [20]:
# The coefficients
print("Coefficients:\n{}".format(regr.coef_))
In [21]:
# Plot outputs
plt.scatter(X_test, y_test, color='black')
plt.plot(X_test, y_pred, color='blue', linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
In [22]:
# WHERE THE FUCK IS THE USE OF THE MEAN SQUARE ERROR? (AGAIN)
print("Mean square error: {:0.2f}".format(mean_squared_error(y_test, y_pred)))
In [23]:
# R2 score
print("Variance Score (1 is perfect prediction) -> {:.2f}".format(r2_score(y_test, y_pred)))
In [24]:
###
# Predicting energy output of a power plant
###
power_dataset = np.loadtxt('book_code/Section 2/power.csv', delimiter=',')
In [25]:
power_dataset.shape
Out[25]:
In [26]:
power_dataset[:10]
Out[26]:
In [27]:
# Another module import
from sklearn.model_selection import train_test_split
In [29]:
# Split features from labels, independent variable Ambient Temperature (AT),Exhaust Vacuum (EV), Ambient Pressure (AP), Relative Humidity (RH) into X
# and dependent variable energy output into y
X = power_dataset[:, 0:4]
y = power_dataset[:, 4]
In [30]:
# We split training / test data
X_training, X_test, y_training, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [31]:
# Test the shapes
print(X_training.shape)
print(y_training.shape)
print(X_test.shape)
print(y_test.shape)
In [32]:
# Plotting outputs Ambient temperature vs energy output
plt.scatter(X_training[:, 0], y_training, color='red')
plt.xlabel('Ambient Temperature')
plt.ylabel('energy output')
plt.show()
In [34]:
# Energy output vs exhaust vacuum
plt.scatter(X_training[:, 1], y_training, color='red')
plt.xlabel('Exhaust Vacuum')
plt.ylabel('Energy Output')
plt.show()
In [35]:
# EO vs Ambient Pressure
plt.scatter(X_training[:, 2], y_training, color='red')
plt.xlabel('Ambient Pressure')
plt.ylabel('Energy Output')
plt.show()
In [36]:
# Plot EO vs Relative Humidity
plt.scatter(X_training[:, 3], y_training, color='red')
plt.xlabel('Relative Humidity')
plt.ylabel('Energy Output')
plt.show()
In [37]:
# Create the linear regression object
power_regr = linear_model.LinearRegression()
In [38]:
# We then train the model using the training sets
power_regr.fit(X_training, y_training)
Out[38]:
In [39]:
# Make predictions using the testing set
y_predictions = power_regr.predict(X_test)
In [40]:
# The coefficients
print("Coefficients: {}".format(power_regr.coef_))
In [42]:
# Calculate the mean square error
print("Mean squared error: {:0.2f}".format(mean_squared_error(y_test, y_predictions)))
In [43]:
# Show variance score
print("Variance score (1 == perfect) ---> {:.2f}".format(r2_score(y_test, y_predictions)))
In [44]:
# Plot outputs
plt.scatter(X_test[:, 0], y_test, color='black')
plt.plot(X_test[:, 0], y_predictions, color='blue', linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
In [ ]:
# And the last plot is just black magic!