In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import datetime as dt
from sklearn import datasets, linear_model
%matplotlib inline
In [3]:
data = np.genfromtxt('recs2009_public.csv',delimiter=',',skip_header=1)
In [4]:
totalSF=data[:,827]
In [5]:
totalKWH=data[:,839]
In [6]:
income=data[:,785]
In [7]:
age=data[:,760]
In [8]:
members=data[:,759]
In [9]:
education=data[:,758]
In [10]:
plt.figure(figsize=(15,10))
plt.plot(totalSF,totalKWH,'ro')
plt.ylabel('Residential Electric Consumption [kWH]')
Out[10]:
In [11]:
SFtrain=totalSF[:-6040]
KWHtrain=totalKWH[:-6040]
In [12]:
print SFtrain
In [13]:
SFtest=totalSF[-6040:]
KWHtest=totalKWH[-6040:]
In [14]:
print SFtest
In [15]:
plt.figure(figsize=(15,10))
plt.plot(SFtrain,KWHtrain,'ro')
plt.ylabel('Residential Electric Consumption Train Data [kWH]')
plt.xlabel('Total Residential Area [SF]')
Out[15]:
In [16]:
plt.figure(figsize=(15,10))
plt.plot(SFtest,KWHtest,'ro')
plt.ylabel('Residential Electric Consumption [kWH]')
plt.xlabel('Total Residential Area [SF]')
Out[16]:
In [17]:
from scipy import stats
In [18]:
slope, intercept, r_value, p_value, std_err = stats.linregress(SFtrain,KWHtrain)
In [19]:
slope
Out[19]:
In [20]:
intercept
Out[20]:
In [21]:
KWHpredict=slope*SFtest+intercept
In [22]:
plt.figure(figsize=(15,10))
plt.plot(SFtest,KWHtest,'.b')
plt.plot(SFtest,KWHpredict,'.r')
plt.legend(['true data','predicted data'])
plt.title('Prediction on testing data')
Out[22]:
In [46]:
plt.scatter(data[:,34], data[:,461])
Out[46]:
In [29]:
full=data[np.where(data[:,34]==data[:,461])]
In [31]:
SFfull=full[:,827]
In [32]:
SFfull
Out[32]:
In [35]:
KWHfull=full[:,839]
In [36]:
len(SFfull)
Out[36]:
In [38]:
SFfulltrain=SFfull[:-4718]
SFfulltest=SFfull[-4718:]
In [39]:
KWHfulltrain=KWHfull[:-4718]
KWHfulltest=KWHfull[-4718:]
In [40]:
plt.figure(figsize=(15,10))
plt.plot(SFfulltrain,KWHfulltrain,'ro')
plt.ylabel('Residential Electric Consumption Full[kWH]')
plt.xlabel('Total Residential Area [SF]')
Out[40]:
In [41]:
slopef, interceptf, r_valuef, p_valuef, std_errf = stats.linregress(SFfulltrain,KWHfulltrain)
In [42]:
slopef
Out[42]:
In [43]:
interceptf
Out[43]:
In [44]:
KWHfullpredict=slopef*SFfulltest+interceptf
In [45]:
plt.figure(figsize=(15,10))
plt.plot(SFfulltest,KWHfulltest,'.b')
plt.plot(SFfulltest,KWHfullpredict,'.r')
plt.legend(['true data','predicted data'])
plt.title('Prediction on testing data')
Out[45]:
In [ ]: