notebook.community

Edit and run



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import datetime as dt
from sklearn import datasets, linear_model
%matplotlib inline



In [3]:

    
data = np.genfromtxt('recs2009_public.csv',delimiter=',',skip_header=1)



In [4]:

    
totalSF=data[:,827]



In [5]:

    
totalKWH=data[:,839]



In [6]:

    
income=data[:,785]



In [7]:

    
age=data[:,760]



In [8]:

    
members=data[:,759]



In [9]:

    
education=data[:,758]



In [10]:

    
plt.figure(figsize=(15,10))
plt.plot(totalSF,totalKWH,'ro')
plt.ylabel('Residential Electric Consumption [kWH]')









    Out[10]:





<matplotlib.text.Text at 0x7f16a54bc250>



In [11]:

    
SFtrain=totalSF[:-6040]
KWHtrain=totalKWH[:-6040]



In [12]:

    
print SFtrain









    



[ 5075.  3136.   528. ...,   644.   834.   952.]



In [13]:

    
SFtest=totalSF[-6040:]
KWHtest=totalKWH[-6040:]



In [14]:

    
print SFtest









    



[ 3640.  2025.  3068. ...,  4581.  1728.  4920.]



In [15]:

    
plt.figure(figsize=(15,10))
plt.plot(SFtrain,KWHtrain,'ro')
plt.ylabel('Residential Electric Consumption Train Data [kWH]')
plt.xlabel('Total Residential Area [SF]')









    Out[15]:





<matplotlib.text.Text at 0x7f16ad163b50>



In [16]:

    
plt.figure(figsize=(15,10))
plt.plot(SFtest,KWHtest,'ro')
plt.ylabel('Residential Electric Consumption [kWH]')
plt.xlabel('Total Residential Area [SF]')









    Out[16]:





<matplotlib.text.Text at 0x7f16ace7dc50>



In [17]:

    
from scipy import stats



In [18]:

    
slope, intercept, r_value, p_value, std_err = stats.linregress(SFtrain,KWHtrain)



In [19]:

    
slope









    Out[19]:





2.0735207102480704



In [20]:

    
intercept









    Out[20]:





6817.2690298966618



In [21]:

    
KWHpredict=slope*SFtest+intercept



In [22]:

    
plt.figure(figsize=(15,10))
plt.plot(SFtest,KWHtest,'.b')
plt.plot(SFtest,KWHpredict,'.r')
plt.legend(['true data','predicted data'])
plt.title('Prediction on testing data')









    Out[22]:





<matplotlib.text.Text at 0x7f16acd4f450>



In [46]:

    
plt.scatter(data[:,34], data[:,461])









    Out[46]:





<matplotlib.collections.PathCollection at 0x7f16a863bc50>






    



/opt/anaconda/envs/np18py27-1.9/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):



In [29]:

    
full=data[np.where(data[:,34]==data[:,461])]



In [31]:

    
SFfull=full[:,827]



In [32]:

    
SFfull









    Out[32]:





array([ 5075.,  3136.,   528., ...,  4581.,  1728.,  4920.])



In [35]:

    
KWHfull=full[:,839]



In [36]:

    
len(SFfull)









    Out[36]:





9472



In [38]:

    
SFfulltrain=SFfull[:-4718]
SFfulltest=SFfull[-4718:]



In [39]:

    
KWHfulltrain=KWHfull[:-4718]
KWHfulltest=KWHfull[-4718:]



In [40]:

    
plt.figure(figsize=(15,10))
plt.plot(SFfulltrain,KWHfulltrain,'ro')
plt.ylabel('Residential Electric Consumption Full[kWH]')
plt.xlabel('Total Residential Area [SF]')









    Out[40]:





<matplotlib.text.Text at 0x7f16acbfb190>



In [41]:

    
slopef, interceptf, r_valuef, p_valuef, std_errf = stats.linregress(SFfulltrain,KWHfulltrain)



In [42]:

    
slopef









    Out[42]:





2.0986028355898076



In [43]:

    
interceptf









    Out[43]:





6939.3870321567492



In [44]:

    
KWHfullpredict=slopef*SFfulltest+interceptf



In [45]:

    
plt.figure(figsize=(15,10))
plt.plot(SFfulltest,KWHfulltest,'.b')
plt.plot(SFfulltest,KWHfullpredict,'.r')
plt.legend(['true data','predicted data'])
plt.title('Prediction on testing data')









    Out[45]:





<matplotlib.text.Text at 0x7f16a8724890>



In [ ]: