Analysis of the energy efficiency dataset from UCI.
In [1]:
import numpy as np
import pandas as pd
%pylab inline
pylab.style.use('ggplot')
import seaborn as sns
In [2]:
data_df = pd.read_csv('energy_efficiency.csv')
In [4]:
data_df.head()
Out[4]:
In [9]:
for fname in feature_df:
pylab.figure()
sns.jointplot(x=fname, y='Y1', data=data_df)
In [10]:
feature_df = data_df.drop(['Y1', 'Y2'], axis=1)
In [11]:
y1_corrs = feature_df.corrwith(data_df.Y1)
y1_corrs.plot(kind='bar')
Out[11]:
In [15]:
f_corrs = feature_df.corr()
sns.heatmap(f_corrs, annot=True)
Out[15]:
In [17]:
import statsmodels.formula.api as sm
In [24]:
y1_model = sm.ols(data=data_df,
formula='Y1 ~ X4 + X2 + X7')
y1_result = y1_model.fit()
y1_result.summary()
Out[24]:
In [26]:
for fname in feature_df:
pylab.figure()
sns.jointplot(x=fname, y='Y2', data=data_df)
In [27]:
y2_corrs = feature_df.corrwith(data_df.Y2)
y2_corrs.plot(kind='bar')
Out[27]:
In [28]:
y2_model = sm.ols(data=data_df,
formula='Y2 ~ X4 + X2 + X7')
y2_result = y1_model.fit()
y2_result.summary()
Out[28]:
In [ ]: