In [1]:
# import packages for analysis and modeling
# data frame operations
import pandas as pd
# pathname pattern expansion
import glob
# dates treatment
import datetime
import os
# regular expressions
import re
# Plots
import matplotlib.pyplot as plt
In [2]:
# Hourly consumption file from ENTSO-e
file_name = 'Hourly_2013_month10.xls'
In [3]:
# read excel file
wb = pd.read_excel(file_name, skiprows=9, na_values=[u'n.a.'], keep_default_na=False)
In [4]:
# show columns names
wb.columns
Out[4]:
In [5]:
# show first rows of the data frame
wb.head()
Out[5]:
In [6]:
hourchange='3B:00:00'
In [7]:
# delete a column
del wb[hourchange]
In [8]:
# show columns names
wb.columns
Out[8]:
In [9]:
# show first rows of the data frame
wb.head()
Out[9]:
In [10]:
# check types
wb.dtypes
Out[10]:
In [11]:
# create new column as a date object
wb['date'] = pd.to_datetime(wb['Day'])
In [12]:
# show columns names
wb.columns
Out[12]:
In [13]:
# check types
wb.dtypes
Out[13]:
In [14]:
# Add new columns with info about weekday, month and year
wb['weekday'] = wb.date.apply(lambda x: x.weekday())
wb['month'] = wb.date.apply(lambda x: x.month)
wb['year'] = wb.date.apply(lambda x: x.year)
In [15]:
# show columns names
wb.columns
Out[15]:
In [16]:
# show first rows of the data frame
wb.head()
Out[16]:
In [17]:
# change column name for 3A:00 to 3:00
wb = wb.rename(columns=lambda x: re.sub(r'^(\d{1})A:.+', 'H0\\1', x))
In [19]:
# change columns names with regular expressions
wb = wb.rename(columns=lambda x: re.sub(r'^(\d{2}):.+', 'H\\1', x))
In [42]:
# show first rows of the data frame
wb.head()
Out[42]:
In [43]:
# show info about the data frame
wb.describe()
Out[43]:
In [21]:
# Set index to make operations easier
wb = wb.set_index(['Country', 'year', "month", "weekday", "date", "Day"])
In [22]:
# show first rows of the data frame
wb.head()
Out[22]:
In [23]:
# Compute daily consumption
wb['Consumption'] = wb.sum(axis='columns')
In [24]:
# reset index
wb = wb.reset_index()
In [25]:
#Select a country
df_ES = wb[wb.Country == 'ES']
In [26]:
# delete column
del df_ES['Country']
In [27]:
# show first rows of the data frame
df_ES.head()
Out[27]:
In [28]:
# show only the selected columns
df_ES[['date', 'weekday', 'Consumption']]
Out[28]:
In [29]:
# Plot the month consumption
plt.figure()
graphic = df_ES.plot(x='date', y='Consumption', title='Consumption in Spain 10/2013', kind='area')
graphic.set_ylabel('MWh')
plt.show()
In [30]:
# BoxPlots of the hourly values
plt.figure()
df_ES.iloc[:,5:29].boxplot(return_type='dict')
plt.show()
In [31]:
# Compute average hourly consumption per weekday
prot_day = df_ES.groupby(['weekday']).mean()
In [32]:
# delete columns that are not needed
prot_day = prot_day.drop(['year', 'month', 'Consumption'], axis='columns')
# show the data frame
prot_day
Out[32]:
In [33]:
# transpose the data frame
prot_dayT=prot_day.transpose()
In [41]:
# Change columns names
prot_dayT.columns = ['Monday', 'Tuesday', 'Wednesday', 'Thrusday', 'Friday', 'Saturday', 'Sunday']
# show the data frame
prot_dayT
Out[41]:
In [36]:
# Plot prototype weekdays
prot_dayT.plot(title= 'Average Consumption per Weekday for Spain 10/2013')
plt.show()