In [1]:
from __future__ import division
import os
import numpy as np
import pandas as pd
from helpers import data_provider
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import seaborn as sns
In [3]:
%matplotlib inline
plt.style.use('classic')
plt.rc("figure", facecolor="white")
fig_width_pt = 469.755 # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (np.sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
params = {'backend': 'ps',
'axes.labelsize': 10,
'text.fontsize': 10,
'legend.fontsize': 10,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'text.usetex': True,
'figure.figsize': fig_size}
plt.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
plt.rcParams.update(params)
def plot_consumption(figure_name, which_house, data):
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
plt.plot(data, color='b')
plt.xticks(rotation=45)
plt.grid(True)
plt.xlabel('Time')
plt.ylabel('Consumption (kwh)')
plt.title('House nr. '+str(which_house)+'Electricity Consumption')
plt.savefig('figures/electricity_consumption/'+figure_name+'_'+str(which_house)+'.pdf')
plt.savefig('figures/electricity_consumption/'+figure_name+'_'+str(which_house)+'.eps')
plt.show()
def insta_plot(vals):
plt.figure(figsize=(12,6))
plt.plot(vals, color='b')
plt.xticks(rotation=45)
plt.title('Unregistered Electricity Consumption')
plt.xlabel('Time')
plt.ylabel('Consumption (kwh)')
plt.grid(True)
plt.show()
In [3]:
#Plot and save consumption figures for each house
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
for house_nr in houses:
data = data_provider.load_aggregate(house_nr)
plot_consumption(figure_name='electricity_consumption',which_house=house_nr,data=data.Aggregate)
In [9]:
# Save important summary statistics for each house
d = {}
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
for house_nr in houses:
data = data_provider.load_aggregate(house_nr)
key = 'house_'+str(house_nr)
d[key] = data.describe()
write_to_file('summary_statistics.txt',d)
In [28]:
df = pd.DataFrame()
for house_nr in houses:
data = data_provider.load_aggregate(house_nr)
key = 'house_'+str(house_nr)
df = pd.concat([df,data], ignore_index=True, axis=1)
names= ['House '+str(house) for house in houses]
df.columns = names
In [36]:
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
_=df.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/houses_box.eps')
plt.savefig('figures/houses_box.pdf')
In [40]:
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=df)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/test_houses_box.eps')
plt.savefig('figures/test_houses_box.pdf')
In [4]:
data = pd.read_csv('houses.csv',parse_dates=['Time'],index_col='Time')
In [5]:
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
data.columns = names
In [22]:
insta_plot(data.House_1['2015-02-08'])
In [54]:
insta_plot(data.House_16['2014-05-06'])
In [60]:
insta_plot(data.House_18['2015-03-22'])
In [4]:
df = pd.read_csv('houses_clean.csv',parse_dates=['Time'],index_col='Time')
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
df.columns = names
In [5]:
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=df)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kWh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/sns_houses_clean_box.eps')
plt.savefig('figures/sns_houses_clean_box.pdf')
In [6]:
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
_=df.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/houses_clean_box.eps')
plt.savefig('figures/houses_clean_box.pdf')
In [11]:
df.describe()
Out[11]:
In [10]:
write_to_file('clean_summary_statistics.txt',d)
In [30]:
# load the data set
data_process = pd.read_csv('houses_clean.csv',parse_dates=['Time'], index_col='Time')
data = data_process.copy(deep=True)
In [34]:
data.quantile(0.95)
Out[34]:
In [35]:
data = data[data <= data.quantile(0.95)]
In [38]:
x = data.House_1.dropna(axis=0)
mu = data.House_1.mean()
sigma = data.House_1.std()
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=1)
plt.xlabel('Consumption')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
In [43]:
houses=[1,2,4,5,6,7,8,9,10,12,13,15,16,17,18,19,20]
names= ['House '+str(house) for house in houses]
data.columns = names
In [85]:
flierprops = dict(marker='.', markerfacecolor='grey', markersize=4, fillstyle='none',
linestyle='none')
In [86]:
plt.figure(1)
plt.clf()
plt.axes([0.125,0.2,0.95-0.125,0.95-0.2])
sns.boxplot(data=data,flierprops=flierprops)
plt.xticks(rotation=45)
plt.ylabel('Consumption (kwh)')
plt.title('Electricity Consumption Summary per House')
plt.savefig('figures/95_percent/sns_houses_clean_box_95.eps')
plt.savefig('figures/95_percent/sns_houses_clean_box_95.pdf')
In [ ]: