In [1]:
from __future__ import division
import os
import numpy as np
import pandas as pd
from helpers import data_provider
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
# figure adjustments for compatibility with latex
%matplotlib inline
plt.style.use('classic')
plt.rc("figure", facecolor="white")
fig_width_pt = 469.755 # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (np.sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
params = {'backend': 'ps',
'axes.labelsize': 10,
'text.fontsize': 10,
'legend.fontsize': 10,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'text.usetex': True,
'figure.figsize': fig_size}
plt.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
plt.rcParams.update(params)
In [3]:
# load the results csv
data = pd.read_csv('results_linear_regression.csv',index_col='House')
data = data.drop(['Unnamed: 0'],axis=1)
In [4]:
#sns.set(style="whitegrid", color_codes=True)
y=data.train_r_squared_adj.values
x=data.index
pal = sns.color_palette("GnBu_d", len(data))
rank = data.train_r_squared_adj.argsort().argsort()
plt.figure(1)
sns.barplot(x=x,y=y,palette=np.array(pal[::-1])[rank])
plt.ylabel('Adjusted R Squared')
plt.title('Baseline Model - Goodness of Fit')
plt.savefig('figures/baseline_model/adj_rsquared.eps')
plt.savefig('figures/baseline_model/adj_rsquared.pdf')
plt.show()
In [8]:
#sns.set(style="whitegrid", color_codes=True)
y=data.test_r_squared_adj.values
x=data.index
pal = sns.color_palette("GnBu_r", len(data))
rank = data.test_r_squared_adj.argsort().argsort()
plt.figure(1)
sns.barplot(x=x,y=y,palette=np.array(pal[::-1])[rank])
plt.ylabel('Adjusted R Squared')
plt.title('Adjusted R Squared on Test Set')
plt.savefig('figures/baseline_model/adj_rsquared_test.eps')
plt.savefig('figures/baseline_model/adj_rsquared_test.pdf')
plt.show()
In [20]:
# Setting the positions and width for the bars
pos = list(range(len(data['train_F_DayOfMonth'])))
width = 0.2
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['train_F_DayOfMonth'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['train_F_Hour'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#F78F1E')
# with label the second value in first_name
#label=df['first_name'][1]
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
data['train_F_Month'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*3 for p in pos],
#using df['post_score'] data,
data['train_F_WeekDays'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#B9770E')
# with label the third value in first_name
#label=df['first_name'][2]
# Set the y axis label
ax.set_ylabel('F-statistic')
# Set the chart's title
ax.set_title('Baseline Model - Importance of Predictors per House')
# Set the position of the x ticks
ax.set_xticks([p + 1.6 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['train_F_Hour'] + data['train_F_Month'])] )
# Adding the legend and showing the plot
plt.legend(['Day of Month', 'Hour', 'Month', 'Day of Week'], loc='best')
plt.grid()
plt.savefig('figures/baseline_model/anova.eps')
plt.savefig('figures/baseline_model/anova.pdf')
plt.show()
In [18]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mae'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mae'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FE2E2E')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_mde'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#0000FF')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Absolute Error')
# Set the chart's title
ax.set_title('Baseline Model - Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_mae'] + 0.1)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Error', 'Median Absolute Error'], loc='best')
plt.grid()
plt.savefig('figures/baseline_model/mae_mde.eps')
plt.savefig('figures/baseline_model/mae_mde.pdf')
plt.show()
In [21]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mape'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#2ECCFA')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_stdape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFBF00')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Percentage Error')
# Set the chart's title
ax.set_title('Baseline Model - Percentage Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_stdape'] + 10)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Percentage Error', 'Standard Deviation Percentage Error'], loc='best')
plt.grid()
plt.savefig('figures/baseline_model/mape_stdape.eps')
plt.savefig('figures/baseline_model/mape_stdape.pdf')
plt.show()
In [4]:
# load the results csv
data = pd.read_csv('results_linear_weather.csv',index_col='House')
data = data.drop(['Unnamed: 0'],axis=1)
In [8]:
# Setting the positions and width for the bars
pos = list(range(len(data['train_F_AirTemperature'])))
width = 0.2
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['train_F_AirTemperature'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['train_F_Relative_humitidity'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#F78F1E')
# with label the second value in first_name
#label=df['first_name'][1]
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
data['train_F_Total_horizontal_solar_irradiation'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*3 for p in pos],
#using df['post_score'] data,
data['train_F_Total_rainfall'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#B9770E')
# with label the third value in first_name
#label=df['first_name'][2]
# Set the y axis label
ax.set_ylabel('F-statistic')
# Set the chart's title
ax.set_title('Linear Model - Importance of Weather Predictors per House')
# Set the position of the x ticks
ax.set_xticks([p + 1.6 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['train_F_Relative_humitidity'] + 30)] )
# Adding the legend and showing the plot
plt.legend(['Air Temperature','Relative Humidity', 'Total Solar Irradiation','Total Rainfall'], loc='best')
plt.grid()
plt.savefig('figures/baseline_model/anova_weather.eps')
plt.savefig('figures/baseline_model/anova_weather.pdf')
plt.show()
In [3]:
# load the results csv
data = pd.read_csv('results_dynamic_regression.csv',index_col='House')
data = data.drop(['Unnamed: 0'],axis=1)
In [6]:
#sns.set(style="whitegrid", color_codes=True)
y=data.train_r_squared_adj.values
x=data.index
pal = sns.color_palette("GnBu_d", len(data))
rank = data.train_r_squared_adj.argsort().argsort()
plt.figure(1)
sns.barplot(x=x,y=y,palette=np.array(pal[::-1])[rank])
plt.ylabel('Adjusted R Squared')
plt.title('Dynamic Model - Goodness of Fit')
plt.savefig('figures/dynamic_model/adj_rsquared.eps')
plt.savefig('figures/dynamic_model/adj_rsquared.pdf')
plt.show()
In [5]:
#sns.set(style="whitegrid", color_codes=True)
y=data.test_r_squared_adj.values
x=data.index
pal = sns.color_palette("GnBu_r", len(data))
rank = data.test_r_squared_adj.argsort().argsort()
plt.figure(1)
sns.barplot(x=x,y=y,palette=np.array(pal[::-1])[rank])
plt.ylabel('Adjusted R Squared')
plt.title('Adjusted R Squared on Test Set')
plt.savefig('figures/dynamic_model/adj_rsquared_test.eps')
plt.savefig('figures/dynamic_model/adj_rsquared_test.pdf')
plt.show()
In [14]:
# Setting the positions and width for the bars
pos = list(range(len(data['train_F_DayOfMonth'])))
width = 0.3
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['train_F_t1'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['train_F_t24'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('F-statistic')
# Set the chart's title
ax.set_title('Dynamic Model - Importance of Lag Predictors per House')
# Set the position of the x ticks
ax.set_xticks([p + 1 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['train_F_Hour'] + data['train_F_t1'])] )
# Adding the legend and showing the plot
plt.legend(['Lag 1','Lag 24'], loc='best')
plt.grid()
plt.savefig('figures/dynamic_model/anova.eps')
plt.savefig('figures/dynamic_model/anova.pdf')
plt.show()
In [15]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mae'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mae'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FE2E2E')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_mde'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#0000FF')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Absolute Error')
# Set the chart's title
ax.set_title('Dynamic Model - Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_mae'] + 0.1)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Error', 'Median Absolute Error'], loc='best')
plt.grid()
plt.savefig('figures/dynamic_model/mae_mde.eps')
plt.savefig('figures/dynamic_model/mae_mde.pdf')
plt.show()
In [16]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mape'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#2ECCFA')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_stdape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFBF00')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Percentage Error')
# Set the chart's title
ax.set_title('Dynamic Model - Percentage Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_stdape'] + 10)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Percentage Error', 'Standard Deviation Percentage Error'], loc='best')
plt.grid()
plt.savefig('figures/dynamic_model/mape_stdape.eps')
plt.savefig('figures/dynamic_model/mape_stdape.pdf')
plt.show()
In [4]:
# load the results csv
data = pd.read_csv('results_forest_regression.csv',index_col='House')
data = data.drop(['Unnamed: 0'],axis=1)
In [11]:
# Setting the positions and width for the bars
pos = list(range(len(data['train_r_squared_adj'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['train_r_squared_adj'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#086A87')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_r_squared_adj'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#04B486')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel(r'Adjusted $R^2$')
# Set the chart's title
ax.set_title('Dynamic Model - Percentage Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, 1.2] )
# Adding the legend and showing the plot
plt.legend([r'Adjusted $R^2$ on Training set', r'Adjusted $R^2$ on Test set'], loc='best')
plt.ylabel(r'Adjusted $R^2$')
plt.title('Random Forest Model - Goodness of Fit')
plt.savefig('figures/forest_model/adj_rsquared.eps')
plt.savefig('figures/forest_model/adj_rsquared.pdf')
plt.show()
In [12]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mae'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mae'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FE2E2E')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_mde'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#0000FF')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Absolute Error')
# Set the chart's title
ax.set_title('Random Forest Model - Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_mae'] + 0.1)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Error', 'Median Absolute Error'], loc='best')
plt.grid()
plt.savefig('figures/forest_model/mae_mde.eps')
plt.savefig('figures/forest_model/mae_mde.pdf')
plt.show()
In [13]:
# Setting the positions and width for the bars
pos = list(range(len(data['test_mape'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['test_mape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#2ECCFA')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['test_stdape'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFBF00')
# with label the second value in first_name
#label=df['first_name'][1]
# Set the y axis label
ax.set_ylabel('Percentage Error')
# Set the chart's title
ax.set_title('Random Forest Model - Percentage Error Metrics')
# Set the position of the x ticks
ax.set_xticks([p + 1.0 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, max(data['test_stdape'] + 10)] )
# Adding the legend and showing the plot
plt.legend(['Mean Absolute Percentage Error', 'Standard Deviation Percentage Error'], loc='best')
plt.grid()
plt.savefig('figures/forest_model/mape_stdape.eps')
plt.savefig('figures/forest_model/mape_stdape.pdf')
plt.show()
In [17]:
# Setting the positions and width for the bars
pos = list(range(len(data['importance_DayOfMonth'])))
width = 0.15
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['importance_DayOfMonth'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['importance_Hour'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#F78F1E')
# with label the second value in first_name
#label=df['first_name'][1]
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
data['importance_Month'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*3 for p in pos],
#using df['post_score'] data,
data['importance_WeekDays'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#B9770E')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*4 for p in pos],
#using df['post_score'] data,
data['importance_t1'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#00BFFF')
plt.bar([p + width*5 for p in pos],
#using df['post_score'] data,
data['importance_t24'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#5FB404')
# Set the y axis label
ax.set_ylabel('Feature Importance')
# Set the chart's title
ax.set_title('Random Forest Model - Importance of Predictors per House')
# Set the position of the x ticks
ax.set_xticks([p + 1.6 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, 1] )
# Adding the legend and showing the plot
plt.legend(['Day Of Month','Hour', 'Month', 'Day of Week', 'Lag 1', 'Lag 24'], loc='best')
plt.grid()
plt.savefig('figures/forest_model/anova.eps')
plt.savefig('figures/forest_model/anova.pdf')
plt.show()
In [3]:
# load the results csv
data = pd.read_csv('results_xgboost_regression.csv',index_col='House')
data = data.drop(['Unnamed: 0'],axis=1)
In [4]:
data.describe()
Out[4]:
In [5]:
# Setting the positions and width for the bars
pos = list(range(len(data['importance_DayOfMonth'])))
width = 0.15
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['importance_Hour'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['importance_air_temperature'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#F78F1E')
# with label the second value in first_name
#label=df['first_name'][1]
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
data['importance_irradiation'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*3 for p in pos],
#using df['post_score'] data,
data['importance_t1'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#B9770E')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*4 for p in pos],
#using df['post_score'] data,
data['importance_t24'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#00BFFF')
# Set the y axis label
ax.set_ylabel('Feature Importance')
# Set the chart's title
ax.set_title('Extreme Gradient Boosting - Highly Important Predictors')
# Set the position of the x ticks
ax.set_xticks([p + 1.6 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, 3500] )
# Adding the legend and showing the plot
plt.legend(['Hour','Air temperature', 'Average solar irradiation', 'Lag 1', 'Lag 24'], loc='best')
plt.grid()
plt.savefig('figures/xgboost/high_importance.eps')
plt.savefig('figures/xgboost/high_importance.pdf')
plt.show()
In [14]:
# Setting the positions and width for the bars
pos = list(range(len(data['importance_DayOfMonth'])))
width = 0.15
# Plotting the bars
fig, ax = plt.subplots(1)
# Create a bar with pre_score data,
# in position pos,
plt.bar(pos,
#using df['pre_score'] data,
data['importance_Month'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FFC222')
# with label the first value in first_name
#label=df['first_name'][0]
# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos],
#using df['mid_score'] data,
data['importance_DayOfMonth'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#F78F1E')
# with label the second value in first_name
#label=df['first_name'][1]
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
data['importance_WeekDays'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#EE3224')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*3 for p in pos],
#using df['post_score'] data,
data['importance_humidity'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#B9770E')
# with label the third value in first_name
#label=df['first_name'][2]
plt.bar([p + width*4 for p in pos],
#using df['post_score'] data,
data['importance_rainfall'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#00BFFF')
# Set the y axis label
ax.set_ylabel('Feature Importance')
# Set the chart's title
ax.set_title('Ensemble of Decision Trees - Less Important Predictors')
# Set the position of the x ticks
ax.set_xticks([p + 1.6 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(data.index)
ax.set_xlabel('House')
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*5)
plt.ylim([0, 2000] )
# Adding the legend and showing the plot
plt.legend(['Month','Day of month', 'Day of week', 'Total rainfall', 'Average humidity'], loc='best')
plt.grid()
plt.savefig('figures/xgboost/low_importance.eps')
plt.savefig('figures/xgboost/low_importance.pdf')
plt.show()
In [ ]: