In [827]:
import numpy as np
from matplotlib import pyplot as plt
import numpy.lib.recfunctions as nlr
%matplotlib inline
In [828]:
Data = np.genfromtxt('recs2009_public.csv',delimiter=',',\
dtype=[('WALLTYPE', '<i8'),('HDD65', '<i8'),('DIPSTICK','i8'),('FUELHEAT','i8'),('WINDOWS','i8'),\
('PELHEAT','i8'),('PGASHEAT','i8'),('TOTHSQFT','<i8'),('TOTALBTUSPH','<f8')],\
skip_header=1,usecols=(24,6,315,430,609,699,705,829,908))
In [829]:
Data.dtype.fields
Out[829]:
In [830]:
Data.shape
Out[830]:
In [831]:
area = np.where(Data['TOTHSQFT']>0)
Data = Data[area]
wall = np.where(Data['WALLTYPE']<8)
Data = Data[wall]
#dipstick = np.where(Data['DIPSTICK']==0)
#Data = Data[dipstick]
fuel = np.where(Data['FUELHEAT']!=-2)
Data = Data[fuel]
#elepay = np.where(Data['PELHEAT']!=-2)
#Data = Data[elepay]
#gaspay = np.where(Data['PGASHEAT']!=-2)
#Data = Data[gaspay]
energy = Data['TOTALBTUSPH']/Data['TOTHSQFT']
Data_1 = nlr.drop_fields(Data,('TOTHSQFT','TOTALBTUSPH'))
Data_2 = nlr.append_fields(Data_1,'energy',data=energy)
In [832]:
Data_2.shape
Out[832]:
In [833]:
Data_2
Out[833]:
In [834]:
N = len(Data_2)
DM_material = np.zeros((N,7))
for i in range(N):
DM_material[i][Data_2['WALLTYPE'][i]-1] = 1
DM_material
Out[834]:
In [835]:
DM_fuel = np.zeros((N,9))
for i in range(N):
if Data_2['FUELHEAT'][i] == 21:
DM_fuel[i][8] = 1
elif Data_2['FUELHEAT'][i] == 8 or Data_2['FUELHEAT'][i] == 7 or Data_2['FUELHEAT'][i] == 9:
DM_fuel[i][Data_2['FUELHEAT'][i]-2] = 1
else:
DM_fuel[i][Data_2['FUELHEAT'][i]-1] = 1
DM_fuel
Out[835]:
In [836]:
DM_gaspay = np.zeros((N,4))
for i in range(N):
if Data_2['PGASHEAT'][i] == -2:
DM_gaspay[i][3] = 1
else:
DM_gaspay[i][Data_2['PGASHEAT'][i]-1] = 1
DM_gaspay
Out[836]:
In [837]:
DM_elepay = np.zeros((N,4))
for i in range(N):
if Data_2['PELHEAT'][i] == -2:
DM_elepay[i][3] = 1
else:
DM_elepay[i][Data_2['PELHEAT'][i]-1] = 1
DM_elepay
Out[837]:
In [838]:
DM_dipstick = np.zeros((N,3))
for i in range(N):
if Data_2['DIPSTICK'][i] == -2:
DM_dipstick[i][2] = 1
else:
DM_dipstick[i][Data_2['DIPSTICK'][i]] = 1
DM_dipstick
Out[838]:
In [839]:
DM_windows = np.zeros((N,8))
for i in range(N):
if Data_2['WINDOWS'][i] == 0:
DM_windows[i][0] = 1
elif Data_2['WINDOWS'][i] == 10:
DM_windows[i][1] = 1
elif Data_2['WINDOWS'][i] == 20:
DM_windows[i][2] = 1
elif Data_2['WINDOWS'][i] == 30:
DM_windows[i][3] = 1
elif Data_2['WINDOWS'][i] == 41:
DM_windows[i][4] = 1
elif Data_2['WINDOWS'][i] == 42:
DM_windows[i][5] = 1
elif Data_2['WINDOWS'][i] == 50:
DM_windows[i][6] = 1
else:
DM_windows[i][7] = 1
DM_windows
Out[839]:
In [840]:
print('The minimum heating degree days is '+str(min(Data['HDD65'])))
print('The maximum heating degree days is '+str(max(Data['HDD65'])))
In [841]:
fig1= plt.figure(figsize=(15,15))
plt.subplot(321)
plt.hist(Data['HDD65'],bins=25)
plt.title('Zone 1')
plt.xlabel('wall material')
plt.ylabel('Heat Consumption Energy [Thousand BTU]')
plt.subplot(322)
plt.hist(Data['HDD65'],bins=9)
plt.title('Zone 2')
plt.xlabel('wall material')
plt.ylabel('Heat Consumption Energy [Thousand BTU]')
plt.subplot(323)
plt.hist(Data['HDD65'],bins=20)
plt.title('Zone 3')
plt.xlabel('wall material')
plt.ylabel('Heat Consumption Energy [Thousand BTU]')
plt.subplot(324)
plt.hist(Data['HDD65'],bins=19)
plt.title('Zone 4')
plt.xlabel('wall material')
plt.ylabel('Heat Consumption Energy [Thousand BTU]')
plt.subplot(325)
plt.hist(Data['HDD65'],bins=18)
plt.title('Zone 5')
plt.xlabel('wall material')
plt.ylabel('Heat Consumption Energy [Thousand BTU]')
Out[841]:
In [842]:
def Tc(hdd, T_bound):
Tc_matrix = np.zeros((len(hdd), len(T_bound)+1))
for (i,t) in enumerate(hdd):
# first chunk
if t <= T_bound[0]:
Tc_matrix[i,0] = t
continue
else:
Tc_matrix[i,0] = T_bound[0]
# chunks in the middle
n = 1
while(n < len(T_bound)-1 and t > T_bound[n]):
Tc_matrix[i,n] = T_bound[n+1] - T_bound[n]
n += 1
if(n < len(T_bound) and t <= T_bound[n]):
Tc_matrix[i,n] = t - T_bound[n-1]
continue
# last chunk
if(t > T_bound[-1]):
if(len(T_bound)>1):
Tc_matrix[i,-2] = T_bound[-1] - T_bound[-2]
Tc_matrix[i,-1] = t - T_bound[-1]
return Tc_matrix
In [1280]:
# get T_bound
num_chunk = 25
H_bound = np.linspace(min(Data['HDD65']),max(Data['HDD65']),num_chunk+1)[1:-1]
H_bound
Out[1280]:
In [1281]:
DM_hdd = Tc(Data_2['HDD65'],H_bound)
DM_hdd.shape
Out[1281]:
In [1282]:
energy = Data_2['energy']
energy.shape
Out[1282]:
In [1297]:
X = np.hstack((DM_material,DM_fuel,DM_hdd,DM_dipstick))
X
Out[1297]:
In [1284]:
#from scipy import linalg
np.linalg.inv(np.dot(X.T,X))
Out[1284]:
In [1285]:
beta_hat = (linalg.inv(np.dot(X.T,X)).dot(X.T)).dot(energy)
beta_hat
Out[1285]:
In [1286]:
predicted = np.dot(X, beta_hat)
Y = energy
print(Y)
print(predicted)
SSres = (Y-predicted).T.dot(Y-predicted)
SSres
ave_y = np.mean(Y)
SStot = (Y-ave_y).T.dot(Y-ave_y)
R2 = 1-SSres/SStot
R2
Out[1286]:
In [1287]:
P = len(X[0])
MSE = SSres/(N-P)
MSE
Out[1287]:
In [1288]:
a = linalg.inv((X.T).dot(X))
a_diagonal = a.diagonal()
a_diagonal
Out[1288]:
In [1289]:
S_beta_k_sqaure = MSE*a_diagonal
S_beta_k = np.sqrt(S_beta_k_sqaure)
S_beta_k
Out[1289]:
Create a 95% confidence level.
In [1290]:
from scipy.stats import t
t_1 = t.isf(0.025,N-P)
t_1
Out[1290]:
In [1291]:
low_CI = beta_hat-t_1*S_beta_k
high_CI = beta_hat+t_1*S_beta_k
CI = np.vstack((low_CI, high_CI)).T
CI
Out[1291]:
In [1292]:
diff = np.diff(CI)
diff
Out[1292]:
In [1293]:
np.where(diff<50)
Out[1293]:
In [1294]:
T = beta_hat/S_beta_k
T
Out[1294]:
In [1295]:
alpha = 0.01
t_2 = t.isf(0.005,N-P)
t_2
Out[1295]:
In [1296]:
for i in range(len(T)):
if -t_2 < T[i] < t_2:
print((i,T[i]))
In [ ]:
In [ ]: