In [5]:
from HourlyPowerConsumptions import HourlyPowerConsumptions
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from visualizations import plot_regression
In [1]:
def evaluate(regressor, X_train, y_train, X_test, y_test):
"""
Given a regressor, it fits the model with X_train and y_train
and then predicts for X_test. Prints the Variance score. Best possible score is 1.0, lower values are worse.
:param regressor: the regressor
:param X_train:
:param y_train:
:param X_test:
:param y_test:
:return: None
"""
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print('Coefficients: \n', regressor.coef_)
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((y_pred - y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regressor.score(X_test, y_test))
return y_pred
In [2]:
def plot_curves(x, y_pred, y_test, x_label, y_label, legend):
"""
It plots to curves, the truth and the predicted
:param x: 1 dim array for x
:param y_pred: 1 dim array for predicted curve
:param y_test: 1 dim array for truth curve
:param x_label: x label
:param y_label: y label
:param legend: legend (expected of size 2)
:return:
"""
if not isinstance(y_pred, np.ndarray):
y_pred = np.asarray(y_pred)
if not isinstance(y_test, np.ndarray):
y_test = np.asarray(y_test)
if not isinstance(x, np.ndarray):
x = np.asarray(x)
# Plot outputs
plt.plot(x, y_pred)
plt.plot(x, y_test)
plt.legend(legend, loc='upper left')
plt.axis([x.min() - 0.2, x.max() + 0.2, y_test.min() - 0.2, y_test.max() + 0.2])
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
In [6]:
dir_path = "/Users/zoraida/Desktop/TEFCON/all-country-data/hourly"
pattern = "/Hourly_201*month*.xls"
year = 2013 # year to predict
country = 'ES' # country to predict
In [7]:
pc = HourlyPowerConsumptions(dir_path, pattern, skiprows=9, maxcolumns=26, hourchange='3B:00:00')
In [9]:
df = pc.historical_daily_aggregates(country, year, num_years=3)
df = df[df.date != '2012-02-29']
In [10]:
df.head()
Out[10]:
In [11]:
y_train = df[df.year.isin(range(year-3,year))].Consumption.values
y_test = df[df.year == year].Consumption.values
X_train = var = df[df.year.isin(range(year - 3, year))][['month', 'year', 'weekday']].values
X_test = df[df.year == year][['month','year','weekday']].values
In [12]:
y_train.shape
Out[12]:
In [13]:
y_test.shape
Out[13]:
In [14]:
X_train.shape
Out[14]:
In [15]:
X_test.shape
Out[15]:
In [16]:
type(X_train)
Out[16]:
In [17]:
type(y_train)
Out[17]:
In [18]:
vec = OneHotEncoder(sparse=False, categorical_features=[0, 2])
X_train_T = vec.fit_transform(X_train).astype(int)
X_test_T = vec.transform(X_test).astype(int)
In [20]:
X_train_T.shape
Out[20]:
In [21]:
X_train_T[0]
Out[21]:
In [22]:
regressor = LinearRegression()
In [23]:
# Returns prediction of a day consumption for all the countries
y_pred = evaluate(regressor, X_train_T, y_train, X_test_T, y_test)
In [ ]: