In [1]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from datetime import timedelta
from matplotlib.ticker import MaxNLocator
%matplotlib inline
In [2]:
# plotting parameters
figure_size = (6.25984252, 4.1456693) # = 15.9, 10.53 cm
def rgb(rgb_255_tuple):
return tuple(v/255 for v in rgb_255_tuple)
colour_op = rgb((13, 103, 53))
colour_s = rgb((254, 205, 8))
colour_p = rgb((237, 29, 36))
sns.set_palette('muted')
In [3]:
df = pd.read_csv('CS1 data 1 min_2017-09-19.pdi.gz')
# also make forward filled
time_interval = pd.Timedelta(minutes=1)
df = df.pivot_table(values='Value', columns='TagName', index=(pd.to_datetime(df['Date'].str.cat(df['Time'], sep=' ')) - time_interval))
df.index.name = 'DateTime'
df['Time_30'] = df.index.floor('30min').time
df['EskomDayOfWeek'] = df.index.dayofweek + 1
# replace holidays with Eskom days
h_path = '..\\holidays.csv'
holidays = np.genfromtxt(h_path, dtype=np.dtype('M'), delimiter=',', usecols=0)
holidays = list(holidays)
holidays_numbers = np.genfromtxt(h_path, dtype=int, delimiter=',', usecols=1)
holidays_numbers = list(holidays_numbers)
for h_n, h_d in zip(holidays_numbers, holidays):
df.loc[df.index.date==h_d,'EskomDayOfWeek'] = h_n
# drop weekends
df = df[df['EskomDayOfWeek'] <= 5]
df.drop('EskomDayOfWeek', axis=1)
df.head()
Out[3]:
In [4]:
df = df.rename(columns = {'KDCE_S03_30INS00_00ILT#501.FA_PV': '30L D1',
'KDCE_S03_30INS00_00ILT#502.FA_PV': '30L D2',
'KDCE_S03_43INS00_00ILT#503.FA_PV': '43L D1a',
'KDCE_S03_43INS00_01ILT#503.FA_PV': '43L D1b',
'KDCE_S03_43INS00_00ILT#504.FA_PV': '43L D2a',
'KDCE_S03_43INS00_01ILT#504.FA_PV': '43L D2b',
'KDCE_S03_30INS00_00IFT#501.FA_PV': 'Flow from 43L',
'KDCE_S01_31INS01_00IFT#505.FA_PV': 'Flow to K1'})
In [5]:
status_col_dict = {}
for c in df.columns:
if '_Machine.FA_RF' in c: # pump status
item = c.split('_')[2].split('PMP')
level = item[0] + 'L'
pump_num = 'P' + item[1][1]
status_col_dict[c] = level + ' ' + pump_num
df = df.rename(columns = status_col_dict)
In [6]:
df.head()
Out[6]:
In [7]:
df[['43L D1a','43L D1b']].plot()
# use a
Out[7]:
In [8]:
df[['43L D2a','43L D2b']].plot()
# use a
Out[8]:
In [9]:
df[['43L D1a','43L D2a']].plot()
Out[9]:
In [10]:
df[['43L D1a','43L D2a']].sum(axis=1).plot()
Out[10]:
In [11]:
df['44L Total pumps'] = df[[c for c in df.columns if '44L P' in c]].sum(axis=1, skipna=False)
df['44L Level'] = df[['43L D1a', '43L D2a']].sum(axis=1) # new percentage on 10 ML, but 5 ML
df.drop(['30L D2', '43L D1a', '43L D2a', '43L D1b', '43L D2b', '30L D1', 'Flow to K1'], axis=1, inplace=True)
In [12]:
df.head()
Out[12]:
In [13]:
df['44L Total pumps'].hist()
Out[13]:
In [14]:
flow_per_pump_if_1_run = df[df['44L Total pumps'] == 1]['Flow from 43L'].mean()
flow_per_pump_if_2_run = df[df['44L Total pumps'] == 2]['Flow from 43L'].mean()/2
count_per_pump_if_1_run = df[df['44L Total pumps'] == 1]['Flow from 43L'].count()
count_per_pump_if_2_run = df[df['44L Total pumps'] == 2]['Flow from 43L'].count()
print('Average flow per pump if one running = {:.1f} L/s'.format(flow_per_pump_if_1_run))
print('Average flow per pump if two running = {:.1f} L/s'.format(flow_per_pump_if_2_run))
ans = (flow_per_pump_if_1_run*count_per_pump_if_1_run + flow_per_pump_if_2_run*count_per_pump_if_2_run) / (count_per_pump_if_1_run+count_per_pump_if_2_run)
print('Weighted average flow per pump = {:.1f} L/s'.format(ans))
ans = float('{:.1f}'.format(ans))
print(ans)
In [15]:
def calculate_inflows(level_name, pump_flowrates, capacity, feeding_level_name=None):
# feeding_level_name is the level feeding this level
number_of_pumps = len(pump_flowrates)
calc_pump_flows = [np.nan]
calc_inflows = [np.nan]
for i in range(1, len(df.index)):
pump_status = {}
for j,_ in enumerate(pump_flowrates):
pump_str = 'P' + str(j+1)
pump_status[pump_str] = df[level_name + ' ' + pump_str].values[i]
l_new = df[level_name + ' Level'].values[i]
l_old = df[level_name + ' Level'].values[i-1]
pump_flow_from_prev_lev = 0 if feeding_level_name is None else df[feeding_level_name + ' PumpFlow'].values[i]
if np.isnan([v for k,v in pump_status.items()] + [l_new] + [l_old] + [pump_flow_from_prev_lev]).any():
ans_pumpflow = np.nan
ans_inflow = np.nan
delta_level = l_new - l_old
if delta_level != 0:
ans_outflow_pumps = 0
for ps, pf in zip(pump_status.items(), pump_flowrates):
ans_outflow_pumps += ps[1] * pf
ans_inflow = ((l_new - l_old) / 100 * capacity) / 60 + ans_outflow_pumps - pump_flow_from_prev_lev
else:
ans_outflow_pumps = np.nan
ans_inflow = np.nan
calc_pump_flows.append(ans_outflow_pumps)
calc_inflows.append(ans_inflow)
df[level_name + ' PumpFlow'] = calc_pump_flows
df[level_name + ' Inflow'] = calc_inflows
In [16]:
calculate_inflows('44L', [ans]*4, 5000000)
In [17]:
df.head()
Out[17]:
In [18]:
df2 = df.copy()
date_start = '2017-05-31'
date_end = '2017-06-01'
df2 = df2[(df2.index>=date_start) & (df2.index<date_end)]
inflow_profiles = []
overall_day_completeness_count = 0
overall_day_completeness_max = 0
overall_completeness_count = 0
overall_completeness_max = 0
for l in ['44L']:
grouped = df2.set_index('Time_30')[l + ' Inflow'].groupby('Time_30')
grouped_mean = grouped.mean()
inflow_profiles.append(grouped_mean)
print('{} completeness: {:6.2f}% → {:6.2f}%'.format(l, grouped.count().sum()/48/30*100, grouped_mean.count()/48*100))#grouped.count()/30
overall_day_completeness_count += grouped.count().sum()
overall_day_completeness_max += 48*30
overall_completeness_count += grouped_mean.count()
overall_completeness_max += 48
print(' ------ -------')
print('All completenes: {:6.2f}% → {:6.2f}%'.format(overall_day_completeness_count/overall_day_completeness_max*100, overall_completeness_count/overall_completeness_max*100))
pd.DataFrame(inflow_profiles).T.to_csv('..\..\simulations\Case_study_1\input\CS1_dam_inflow_profiles.csv.gz', compression='gzip')
In [19]:
df_real = pd.read_csv('CS1 data 1s 05-31_2017-09-27_pivot.csv.gz')
df_real = df_real.set_index('DateTime')
df_real.index = pd.to_datetime(df_real.index)
df_real.head()
Out[19]:
In [20]:
df_real = df_real.rename(columns = {'KDCE_S03_30INS00_00ILT#501.FA_PV': '30L D1',
'KDCE_S03_43INS00_00ILT#503.FA_PV': '43L D1a',
'KDCE_S03_43INS00_00ILT#504.FA_PV': '43L D2a',
'KDCE_S03_30INS00_00IFT#501.FA_PV': 'Flow from 43L'})
status_col_dict = {}
for c in df_real.columns:
if '_Machine.FA_RF' in c: # pump status
item = c.split('_')[2].split('PMP')
level = item[0] + 'L'
pump_num = 'P' + item[1][1]
status_col_dict[c] = level + ' ' + pump_num
df_real = df_real.rename(columns = status_col_dict)
df_real['44L Status'] = df_real[[c for c in df_real.columns if '44L P' in c]].sum(axis=1, skipna=False)
df_real = df_real.rename(columns = {'30L D1': '30L Level'})
df_real['44L Level'] = df_real[['43L D1a', '43L D2a']].sum(axis=1) # new percentage on 10 ML
df_real.drop(['43L D1a', '43L D2a', 'Flow from 43L', '30L Level'], axis=1, inplace=True)
df_real.head()
Out[20]:
In [21]:
date_range_start = '2017-05-31'
date_range_end = '2017-06-01'
level_list = ['44L']
level_tag_list = [l + ' Level' for l in level_list]
status_tag_list = [l + ' Status' for l in level_list]
tag_list = level_tag_list + status_tag_list
df_real2 = df_real.ix[date_range_start:date_range_end]
print('Data completeness:', df_real2[tag_list].count(axis=0).sum()/(2 * 60*60*24))
df_real2[level_tag_list].plot()
df_real2[status_tag_list].plot()
Out[21]:
In [22]:
df_real2['44L Status'] = df_real2['44L Status'].interpolate('pchip')
df_real2['44L Level'] = df_real2['44L Level'].interpolate('pchip')
In [23]:
df_real2[['44L Status', '44L Level']].to_csv('..\..\simulations\Case_study_1\input\CS1_data_for_validation.csv.gz', compression='gzip')
If the simulation outputs are reasonably "the same" as the real data, the simulation is "correct"
Run the simulation in validation_mode using initial values for dam level and following pump status instead of scheduler
In [24]:
df_sim = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_validation.csv.gz')
df_sim['time_clean'] = pd.to_datetime(df_sim['seconds'], unit='s') + timedelta(days=17317)
df_sim.head()
Out[24]:
In [25]:
def rmse(real, sim):
return np.sqrt(((real - sim) ** 2).mean())
def r2(x, y):
return stats.pearsonr(x,y)[0] ** 2
In [26]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['44L Status'].values
y2 = df_sim['44L Status'].values
print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))
In [27]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)
x = df_sim['time_clean']
ax1.plot(x, df_real2['44L Status'], marker=None, label='Actual data', zorder=10)
ax1.plot(x, df_sim['44L Status'], marker=None, label='Simulation data', zorder=11, color=sns.color_palette('muted')[2])
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))
ax1.axvspan('2017-05-31 00:00:00','2017-05-31 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2017-05-31 06:00:00','2017-05-31 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2017-05-31 07:00:00','2017-05-31 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2017-05-31 10:00:00','2017-05-31 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 18:00:00','2017-05-31 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2017-05-31 20:00:00','2017-05-31 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 22:00:00','2017-05-31 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))
ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)
handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
fig.tight_layout()
plt.show()
fig.savefig('output/CS1_validation_44l_status.png', bbox_inches='tight')
plt.close('all')
In [28]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['44L Level'].values
y2 = df_sim['44L Level'].values
print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))
In [29]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
x = df_sim['time_clean']
ax1.plot(x, df_real2['44L Level'], marker=None, label='Actual data', zorder=10)
ax1.plot(x, df_sim['44L Level'], marker=None, label='Simulation data', zorder=11, color=sns.color_palette('muted')[2])
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])
ax1.axvspan('2017-05-31 00:00:00','2017-05-31 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2017-05-31 06:00:00','2017-05-31 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2017-05-31 07:00:00','2017-05-31 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2017-05-31 10:00:00','2017-05-31 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 18:00:00','2017-05-31 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2017-05-31 20:00:00','2017-05-31 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 22:00:00','2017-05-31 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))
ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)
handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
fig.tight_layout()
plt.show()
fig.savefig('output/CS1_validation_44l_level.png', bbox_inches='tight')
plt.close('all')
In [30]:
def tou_shade(ax, date):
y_m_d = '{}-{:02}-{:02}'.format(date.year, date.month, date.day)
ax.axvspan(y_m_d + ' 00:00:00', y_m_d + ' 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax.axvspan(y_m_d + ' 06:00:00', y_m_d + ' 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax.axvspan(y_m_d + ' 07:00:00', y_m_d + ' 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax.axvspan(y_m_d + ' 10:00:00', y_m_d + ' 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax.axvspan(y_m_d + ' 18:00:00', y_m_d + ' 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax.axvspan(y_m_d + ' 20:00:00', y_m_d + ' 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax.axvspan(y_m_d + ' 22:00:00', y_m_d + ' 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)
def sim_plot(case_study, factor, mode, level_name, level_limits, x, y, y_lim=100, save_fig=False, chart_type_override=None):
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
if mode != 'power' and mode != 'power_raw':
for l in level_limits:
ax1.plot([x[0], x[len(x)-1]], [l, l], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
lines1 = ax1.plot(x, df[level_name + ' Level'])
ax2 = ax1.twinx()
lines2 = ax2.plot(x, df[level_name + ' Status'], color=sns.color_palette('muted')[2])
tou_shade(ax2, x[0])
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, y_lim])
ax2.set_ylabel('Number of pumps running')
ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
ax1.set_zorder(ax2.get_zorder()+1)
ax1.patch.set_visible(False)
handles, labels = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
del handles[1]
del labels[1]
handles += handles2
labels += labels2
order = [3, 1, 4, 0, 5, 2]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
ax1.yaxis.label.set_color(sns.color_palette('muted')[0])
ax2.yaxis.label.set_color(sns.color_palette('muted')[2])
else:
lbl = 'Power (resampled, step plot)' if mode == 'power' else 'Power (raw data, 1 second)'
ax1.plot(x, y, drawstyle='steps-post', label=lbl, zorder=3)
tou_shade(ax1, x[0])
ax1.set_ylabel('Power (kW)')
ax1.set_ylim([0, y_lim])
handles, labels = ax1.get_legend_handles_labels()
order = [1, 0, 2, 3]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
ax1.set_xlabel('Time of day')
if mode != 'power' and mode != 'power_raw':
ax1.yaxis.set_ticks(np.arange(0, 101, 10))
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x), max(x)))
ax1.grid(which='major', alpha=0.5)
fig.tight_layout()
if save_fig:
if mode == 'power_raw':
chart_type = 'power_raw'
elif mode == 'power':
chart_type = 'power_resampled'
else:
chart_type = 'dam_level_and_status'
filename = 'output/CS{}_{}_{}_{}.png'.format(case_study, level_name, factor, chart_type)
fig.savefig(filename, bbox_inches='tight')
return fig
In [31]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_1-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()
Out[31]:
In [32]:
for c in df.columns:
if 'Level' in c:
print('{} Min: {}% Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))
In [33]:
fig = sim_plot(1, 1, 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)
In [34]:
fig = sim_plot(1, 1, 'power_raw', '44L', None, df['time_clean'], df['Pump system total power'], y_lim=4000, save_fig=True)
In [35]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS1_resampled_power_1_factor.csv')
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
#fig.patch.set_facecolor('grey')
x = resampled.index
y = resampled['total_30_minute']
ax1.plot(x,y, 'x', label='Power (data points resampled to 30 minutes)', zorder=4)
ax1.plot(x,y, '--', label='Power (resampled, line plot)', zorder=2)
ax1.plot(x,y, drawstyle='steps-post', label='Power (resampled, step plot)', zorder=3)
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Power (kW)')
ax1.set_ylim([0, 4000])
ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))
ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0.)
fig.tight_layout()
plt.show()
fig.savefig('output/CS1_1_power_resampled.png', bbox_inches='tight'),
plt.close('all')
In [36]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_2-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()
Out[36]:
In [37]:
for c in df.columns:
if 'Level' in c:
print('{} Min: {}% Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))
In [38]:
fig = sim_plot(1, 2, 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)
In [39]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS1_resampled_power_2_factor.csv')
fig = sim_plot(1, 2, 'power', '44L', None, resampled.index, resampled['total_30_minute'], y_lim=4000, save_fig=True)
In [40]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_n-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()
Out[40]:
In [41]:
for c in df.columns:
if 'Level' in c:
print('{} Min: {}% Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))
In [42]:
fig = sim_plot(1, 'n', 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)
In [43]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS1_resampled_power_n_factor.csv')
fig = sim_plot(1, 'n', 'power', '44L', None, resampled.index, resampled['total_30_minute'], y_lim=4000, save_fig=True)
In [44]:
scores = pd.read_csv('scoring_results.csv').set_index('Score')
scores
Out[44]:
In [45]:
sns.set()
score_sim = (scores['1-factor']['Performance'], scores['2-factor']['Performance'], scores['n-factor']['Performance'])
score_feat = (scores['1-factor']['Feature'], scores['2-factor']['Feature'], scores['n-factor']['Feature'])
ind = np.arange(len(score_sim)) # the x locations for the groups
width = 0.5 # the width of the bars: can also be len(x) sequence
plt.figure(figsize=figure_size)
p1 = plt.bar(ind, score_sim, width)
p2 = plt.bar(ind, score_feat, width, bottom=score_sim)
plt.ylabel('Scores')
plt.xlabel('Control system')
#'x-factored simulation')
plt.xticks(ind, ('1-factor', '2-factor', r'$n$-factor'))
#plt.yticks(np.arange(0, 101, 10))
plt.gca().yaxis.set_major_locator(plt.NullLocator())
#plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), loc='best')
plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
for r1, r2 in zip(p1, p2):
h1 = r1.get_height()
h2 = r2.get_height()
plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%.2f" % h1, ha="center", va="center", color='white')
plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%.2f" % h2, ha="center", va="center", color='white')
plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 + 1.5, "%.2f" % (h1 + h2),
ha="center", va="center", color='black', weight='bold')
plt.grid(False)
plt.tight_layout()
plt.savefig('output/CS1_final_scores.png', bbox_inches='tight', figsize=figure_size, dpi=1200)
In [ ]: