In [1]:
    
from __future__ import division
import os
import numpy as np 
import pandas as pd
from helpers import data_provider
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import seaborn as sns
    
In [3]:
    
%matplotlib inline
plt.style.use('classic')
plt.rc("figure", facecolor="white")
fig_width_pt = 469.755  # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27               # Convert pt to inch
golden_mean = (np.sqrt(5)-1.0)/2.0         # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt  # width in inches
fig_height = fig_width*golden_mean      # height in inches
fig_size =  [fig_width,fig_height]
params = {'backend': 'ps',
          'axes.labelsize': 10,
          'text.fontsize': 10,
          'legend.fontsize': 10,
          'xtick.labelsize': 8,
          'ytick.labelsize': 8,
          'text.usetex': True,
          'figure.figsize': fig_size}
plt.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
plt.rcParams.update(params)
def plot_consumption(figure_name, which_house, data):
    plt.figure(1)
    plt.clf()
    plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
    plt.plot(data, color='b')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.xlabel('Time')
    plt.ylabel('Consumption (kwh)')
    plt.title('House nr. '+str(which_house)+'Electricity Consumption')
    plt.savefig('figures/electricity_consumption/'+figure_name+'_'+str(which_house)+'.pdf')
    plt.savefig('figures/electricity_consumption/'+figure_name+'_'+str(which_house)+'.eps')
    plt.show()
def insta_plot(vals):
    plt.figure(figsize=(12,6))
    plt.plot(vals, color='b')
    plt.xticks(rotation=45)
    plt.title('Unregistered Electricity Consumption')
    plt.xlabel('Time')
    plt.ylabel('Consumption (kwh)')
    plt.grid(True)
    plt.show()
    
    
In [3]:
    
#Plot and save consumption figures for each house
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
for house_nr in houses:
    data = data_provider.load_aggregate(house_nr)
    plot_consumption(figure_name='electricity_consumption',which_house=house_nr,data=data.Aggregate)
    
    
In [9]:
    
# Save important summary statistics for each house
d = {}
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
for house_nr in houses:
    data = data_provider.load_aggregate(house_nr)
    key = 'house_'+str(house_nr)
    d[key] = data.describe()
    
write_to_file('summary_statistics.txt',d)
    
In [28]:
    
df = pd.DataFrame()
for house_nr in houses:
    data = data_provider.load_aggregate(house_nr)
    key = 'house_'+str(house_nr)
    df = pd.concat([df,data], ignore_index=True, axis=1)
names= ['House '+str(house) for house in houses]
df.columns = names
    
In [36]:
    
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
_=df.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/houses_box.eps')
plt.savefig('figures/houses_box.pdf')
    
    
In [40]:
    
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=df)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/test_houses_box.eps')
plt.savefig('figures/test_houses_box.pdf')
    
    
In [4]:
    
data = pd.read_csv('houses.csv',parse_dates=['Time'],index_col='Time')
    
In [5]:
    
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
data.columns = names
    
In [22]:
    
insta_plot(data.House_1['2015-02-08'])
    
    
In [54]:
    
insta_plot(data.House_16['2014-05-06'])
    
    
In [60]:
    
insta_plot(data.House_18['2015-03-22'])
    
    
In [4]:
    
df = pd.read_csv('houses_clean.csv',parse_dates=['Time'],index_col='Time')
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
df.columns = names
    
In [5]:
    
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=df)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kWh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/sns_houses_clean_box.eps')
plt.savefig('figures/sns_houses_clean_box.pdf')
    
    
In [6]:
    
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
_=df.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/houses_clean_box.eps')
plt.savefig('figures/houses_clean_box.pdf')
    
    
In [11]:
    
df.describe()
    
    Out[11]:
In [10]:
    
write_to_file('clean_summary_statistics.txt',d)
    
In [30]:
    
# load the data set
data_process = pd.read_csv('houses_clean.csv',parse_dates=['Time'], index_col='Time')
data = data_process.copy(deep=True)
    
In [34]:
    
data.quantile(0.95)
    
    Out[34]:
In [35]:
    
data = data[data <= data.quantile(0.95)]
    
In [38]:
    
x = data.House_1.dropna(axis=0)
mu = data.House_1.mean()
sigma = data.House_1.std()
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=1)
plt.xlabel('Consumption')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
    
    
In [43]:
    
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
data.columns = names
    
In [85]:
    
flierprops = dict(marker='.', markerfacecolor='grey', markersize=4, fillstyle='none',
                  linestyle='none')
    
In [86]:
    
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=data,flierprops=flierprops)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/95_percent/sns_houses_clean_box_95.eps')
plt.savefig('figures/95_percent/sns_houses_clean_box_95.pdf')
    
    
In [ ]: