In [1]:
from __future__ import division
from pulp import *
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.mlab as mlab
import pandas as pd
import csv
import datetime
import sqlite3
import math
import glob
import seaborn as sns
%matplotlib inline
from pandas import *
from scipy import optimize
from scipy import stats
from datetime import date, timedelta, datetime
import timeit
from collections import OrderedDict
from IPython.display import clear_output
from IPython.display import HTML
from matplotlib.ticker import MultipleLocator
In [2]:
from sqlite3 import OperationalError
def query_EVSE_eventids(startdate, enddate):
# Query each event, for each event run the optimization based on corresponding energy cost to minimize
# the sum of energy cost every 15 minutes.
print('Start to query EV power at "Alcopark"')
# Get distinct event_ids for
conn = sqlite3.connect("sessionData2016.db")
cursor = conn.cursor()
query=("SELECT DISTINCT(DRIVER_ID) FROM sessionData2016")
query_cond1=("where ENERGY>0 and ENERGY!='' AND")
query_cond2=("STATION LIKE '%ALCO%' AND") # ALCOBASE
query_cond4=("(strftime(SESSION_START_TIME) between ") # MONTH IN JAN
cur = cursor.execute(query+" "+query_cond1+" "+query_cond2+" "+query_cond4+"'"+startdate+"'"+' AND '+"'"+enddate+"'"+')')
rows = cur.fetchall()
driver_ids=[]
for row in rows: driver_ids.append(row[0])
driver_ids=np.array(driver_ids)
driver_dicts={}
for driver_id in driver_ids:
try:
query=("SELECT DISTINCT(EVENT_ID) FROM sessionData2016 WHERE DRIVER_ID=")
query_cond=str(driver_id)
query_cond1=(" AND ENERGY>0 and ENERGY!='' AND")
query_cond2=("STATION LIKE '%ALCO%' AND") # ALCOPARK BASE
query_cond4=("strftime(SESSION_START_TIME) between ") # MONTH IN JAN
cur = cursor.execute(query+" "+query_cond+ " "+query_cond1+" "+query_cond2+" "+query_cond4+"'"+startdate+"'"+' AND '+"'"+enddate+"'")
rows = cur.fetchall()
event_ids=[]
for row in rows: event_ids.append(row[0])
driver_dicts.update({driver_id:event_ids})
except OperationalError:
print('Oops! That was no valid Driver ID: ' + str(driver_id))
conn.close()
return driver_dicts
In [3]:
driver_dicts=query_EVSE_eventids('2016-01-01', '2016-10-30')
In [4]:
print ('There are '+str(len(driver_dicts))+" drivers at Alcopark")
In [5]:
# query event_ids for driver_id with the most charging events
ordered_driver = OrderedDict(sorted(driver_dicts.viewitems(), key=lambda x: len(x[1]), reverse=True))
def query_event_sessions(startdate, enddate, ranks):
for rank in range(ranks):
event_sessions = pd.DataFrame([])
event_ids = ordered_driver.values()[rank]
for event_id in event_ids:
conn = sqlite3.connect("sessionData2016.db")
cursor = conn.cursor()
query=("SELECT DRIVER_ID,EVENT_ID, SESSION_START_TIME AS START,SESSION_END_TIME AS END, SESSION_LENGTH AS LENGTH, ENERGY, STATION FROM sessionData2016 WHERE ENERGY!='' AND EVENT_ID=")
query_cond=str(event_id)
cur = cursor.execute(query+query_cond)
df = pd.DataFrame(cur.fetchall())
df.columns=['driver_id','event_id','start','end','length','cumulative_energy','station']
try:
event_sessions = pd.concat([event_sessions,df])
except ValueError:
print('Oops! That was no valid Event ID: ' + str(event_id))
event_sessions.to_csv('results/FlexDrivers/NewEVStatus/alcopark/2016/'+ordered_driver.keys()[rank]+'_EventSessions.csv')
return event_sessions
In [6]:
event_sessions = query_event_sessions('2016-01-01', '2016-10-30',len(ordered_driver))
In [7]:
# df_header = pd.read_csv('results/FlexDrivers/NewEVStatus/2015-07-01.csv')
filenames = glob.glob('results/FlexDrivers/NewEVStatus/alcopark/2016/'+'*.csv')
dfs = pd.DataFrame()
for filename in filenames:
df = pd.read_csv(filename)
df.index = df[df.columns[0]]
dfs = pd.concat([dfs,df])
dfs.drop(dfs.columns[[0]], axis=1, inplace=True)
dfs.to_csv('results/FlexDrivers/NewEVStatus/alcopark/' + '2016-EV-Sessions.csv')
In [26]:
dfs2 = dfs[dfs['station'].str.contains('ALCOGARSTATIONS')]
In [29]:
for station in set(dfs2['station']):
station_sessions = []
station_sessions = dfs[(dfs.station==station) & (dfs.start >= '2015-01-01')]
station_sessions.start = station_sessions.start.apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
# station_sessions.end = station_sessions.end.apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
# station_sessions.loc[:,'duration'] = (station_sessions.end - station_sessions.start).apply(lambda x: timedelta.total_seconds(x) / 3600).values
station_sessions.loc[:,'time'] = map(lambda x: x.strftime('%H:%M'),station_sessions.start)
station_sessions.loc[:,'hour'] = map(lambda x: int(x.strftime('%H')),station_sessions.start)
# station_sessions.loc[:,'timestamp'] = station_sessions.time.apply(lambda x: int(x.split(':')[0])*3600 + int(x.split(':')[0])*60).values
# station_sessions.loc[:,'overnight'] = map(lambda x,y: y.timetuple().tm_yday - x.timetuple().tm_yday,station_sessions.start,station_sessions.end)
# station_sessions.loc[:,'fleetstatus'] = map(lambda x: 1 if x >=1 else 0,station_sessions.overnight)
# print 'In Station ' + station + ':'
print len(list(set(station_sessions.driver_id))), len(list(set(station_sessions.event_id))), station_sessions.cumulative_energy.sum(),min(station_sessions.start)
# print station_sessions.groupby(['station'])['event_id'].size()
In [25]:
set(dfs.station)
Out[25]:
In [9]:
sns.set_context('poster')
i=0
for station in set(dfs['station']):
station_sessions = []
station_sessions = dfs[dfs.station==station]
station_sessions.start = station_sessions.start.apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
station_sessions.end = station_sessions.end.apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
station_sessions.loc[:,'duration'] = (station_sessions.end - station_sessions.start).apply(lambda x: timedelta.total_seconds(x) / 3600).values
station_sessions.loc[:,'time'] = map(lambda x: x.strftime('%H:%M'),station_sessions.start)
station_sessions.loc[:,'hour'] = map(lambda x: int(x.strftime('%H')),station_sessions.start)
station_sessions.loc[:,'timestamp'] = station_sessions.time.apply(lambda x: int(x.split(':')[0])*3600 + int(x.split(':')[0])*60).values
station_sessions.loc[:,'overnight'] = map(lambda x,y: y.timetuple().tm_yday - x.timetuple().tm_yday,station_sessions.start,station_sessions.end)
station_sessions.loc[:,'fleetstatus'] = map(lambda x: 1 if x >=1 else 0,station_sessions.overnight)
fig, axes = plt.subplots(2,2,figsize=(18, 12), facecolor='w', edgecolor='k', sharey=False)
fig.subplots_adjust(hspace = .3, wspace=.2)
fig.suptitle('Charging Sessions in Station ID: '+station, fontsize=20, color='r')
# axes[0,0].scatter(station_sessions.timestamp / 3600, station_sessions.duration, alpha=0.5)
axes[0,0].scatter(station_sessions[station_sessions.overnight<1].timestamp / 3600, station_sessions[station_sessions.overnight<1].duration,alpha=0.5)
axes[0,0].scatter(station_sessions[station_sessions.overnight>=1].timestamp / 3600, station_sessions[station_sessions.overnight>=1].duration,facecolor='r',alpha=0.8, s=100)
axes[0,0].set_ylabel('Charging Session Duration, [hour]')
axes[0,0].set_xlabel('Session Start Time')
majorLocator = MultipleLocator(2)
minorLocator = MultipleLocator(1)
axes[0,0].set_xlim(0,24)
axes[0,0].legend(['Daytime Charging Sessions','Overnight Charging Sessions'], loc= 'upper left')
axes[0,0].xaxis.set_major_locator(majorLocator)
# axes.set_xticklabels(range(-2,26,2))
grp = station_sessions.groupby(['station','driver_id'])['event_id'].size()
fleetstatus_grp = pd.DataFrame(station_sessions.groupby(['driver_id'])['fleetstatus'].mean())
grp_df = pd.DataFrame()
grp_df.loc[:,'driver_id'] = grp.index.levels[1]
grp_df.loc[:,'session_num'] = grp.values
grp_df.loc[:,'fleetstatus'] = fleetstatus_grp.fleetstatus.values
grp_sort = grp_df.sort_values(['session_num'],ascending=False)
axes[0,0].set_title('(a) Charging Sessions StartTime and Duration')
sns.barplot(grp_sort.driver_id,grp_sort.session_num,palette="BuGn_d",ax = axes[0,1])
axes[0,1].set_xticklabels(grp_df.index+1)
axes[0,1].set_title('(b) Number of Charging Sessions')
axes[0,1].set_ylabel('Number of Charging Sessions')
axes[0,1].set_xlabel('# of Driver')
sns.distplot(station_sessions.hour, bins=range(24), ax = axes[1,0])
sns.distplot(station_sessions.duration, bins=range(100), color = 'green',ax = axes[1,1])
axes[1,0].set_ylabel('Probability, [%]')
axes[1,0].set_xlabel('Hour of Day')
axes[1,0].set_title('(c) Distribution of Arrival Time')
axes[1,0].set_xlim(0,24)
axes[1,1].set_ylabel('Probability, [%]')
axes[1,1].set_xlabel('Hours')
axes[1,1].set_title('(d) Distribution of Session Duration')
axes[1,1].set_xlim(0,100)
i+=1
grp_sort.to_csv('results/FlexDrivers/NewEVStatus/alcopark/StationSessions/'+'Station_'+str(i)+'.csv')
fig.savefig('results/FlexDrivers/NewEVStatus/alcopark/StationSessions/'+'Station_'+str(i)+'.png', dpi=300, format='png',bbox_inches='tight')
In [143]:
event_sessions
Out[143]:
Load flexibility depends on the charging duration and the overall flexibility as the ratio of the duration that a car is plugged but not charging to the overall session duration.
$$l_{flex}=\frac{d_{session}-d_{charge}}{d_{session}}$$
In [56]:
Driver_Power = pd.read_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[0]+'.csv')
Driver_Power.index = Driver_Power.ix[:,0].apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
Driver_Power.drop(Driver_Power.columns[[0]], axis=1, inplace=True)
Driver_Power.dropna(axis=1, how='all')
# Summarize Each Driver's Charging Behaviours
Driver_Status = pd.DataFrame()
for eventid in Driver_Power.columns:
Driver_Status['power'] = Driver_Power[eventid]
Driver_Status=Driver_Status.fillna(-1)
for i in range(len(Driver_Status.columns)):
Driver_Status.loc[Driver_Status[Driver_Status.columns[i]] > 0,Driver_Status.columns[i]] = 1
# Create each driver's charging behavior to csv
Driver_Summary=Driver_Status
Driver_Summary['day'] = map(lambda x: x.strftime('%Y-%m-%d'),Driver_Summary.index)
Driver_Summary['hour'] = map(lambda x: x.strftime('%H'),Driver_Summary.index)
Driver_Summary['hour'] = Driver_Summary.hour.astype(int)
Driver_Summary['power'] = Driver_Summary.power.astype(int)
with open('results/FlexDrivers/2016EVFlexSummary/'+ordered_driver.keys()[0]+'_summary.csv','wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
header = ['day', 'StartTime','ChargingLength','SessionLength', 'Flexibility']
writer.writerow(header)
for day in sorted(set(Driver_Summary['day'])):
df1 = Driver_Summary.loc[(Driver_Summary['power'] >= 0) & (Driver_Summary['day'] == day)]
df2 = Driver_Summary.loc[(Driver_Summary['power'] >= 1) & (Driver_Summary['day'] == day)]
SessionLength = timedelta.total_seconds(df1.index[-1]-df1.index[0])
ChargingLength = timedelta.total_seconds(df2.index[-1]-df2.index[0])
if SessionLength != 0:
Flexibility = (SessionLength-ChargingLength)/SessionLength
else:
Flexibility = 0
StartTime = df1.index[0].strftime('%H:%M')
writer.writerow([day, StartTime,ChargingLength,SessionLength, Flexibility])
In [129]:
ordered_driver = OrderedDict(sorted(driver_dicts.viewitems(), key=lambda x: len(x[1]), reverse=True))
# stat summary of start time of all drivers
with open('results/FlexDrivers/'+'StartTime_summary.csv','wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
writer.writerow(header)
for rank in range(len(ordered_driver.keys())):
df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
df['StartSeconds'] = map(lambda x: int(x.split(":")[0]) * 3600 + int(x.split(":")[1]) * 60,df['StartTime'])
writer.writerow([ordered_driver.keys()[rank],len(df['StartSeconds']),df['StartSeconds'].mean(),
df['StartSeconds'].std(),df['StartSeconds'].min(),np.percentile(df['StartSeconds'],10),
np.percentile(df['StartSeconds'],25),
np.percentile(df['StartSeconds'],50),np.percentile(df['StartSeconds'],75),
np.percentile(df['StartSeconds'],90),df['StartSeconds'].max()])
# stat summary of charging length of all drivers
with open('results/FlexDrivers/'+'ChargingLength_summary.csv','wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
writer.writerow(header)
for rank in range(len(ordered_driver.keys())):
df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
writer.writerow([ordered_driver.keys()[rank],len(df['ChargingLength']),df['ChargingLength'].mean(),
df['ChargingLength'].std(),df['ChargingLength'].min(),np.percentile(df['ChargingLength'],10),
np.percentile(df['ChargingLength'],25),
np.percentile(df['ChargingLength'],50),np.percentile(df['ChargingLength'],75),
np.percentile(df['ChargingLength'],90),df['ChargingLength'].max()])
# stat summary of session length of all drivers
with open('results/FlexDrivers/'+'SessionLength_summary.csv','wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
writer.writerow(header)
for rank in range(len(ordered_driver.keys())):
df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
writer.writerow([ordered_driver.keys()[rank],len(df['SessionLength']),df['SessionLength'].mean(),
df['SessionLength'].std(),df['SessionLength'].min(),np.percentile(df['SessionLength'],10),
np.percentile(df['SessionLength'],25),
np.percentile(df['SessionLength'],50),np.percentile(df['SessionLength'],75),
np.percentile(df['SessionLength'],90),df['SessionLength'].max()])
# stat summary of flexibility of all drivers
with open('results/FlexDrivers/'+'Flexibility_summary.csv','wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
writer.writerow(header)
for rank in range(len(ordered_driver.keys())):
df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
writer.writerow([ordered_driver.keys()[rank],len(df['Flexibility']),df['Flexibility'].mean(),
df['Flexibility'].std(),df['Flexibility'].min(),np.percentile(df['Flexibility'],10),
np.percentile(df['Flexibility'],25),
np.percentile(df['Flexibility'],50),np.percentile(df['Flexibility'],75),
np.percentile(df['Flexibility'],90),df['Flexibility'].max()])
Plot the distribution of each driver's average charging flexibility
In [205]:
from matplotlib.ticker import FuncFormatter
# plot all drivers' flexiblity
df = pd.read_csv('results/FlexDrivers/'+'Flexibility_summary.csv')
sns.set_context('poster')
fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
# fig.subplots_adjust(hspace = .1, wspace=.1)
# sns.distplot(df['mean'],color='m')
axes.set_ylabel('Charging Session Events')
axes.set_xlabel('Flexibility')
num_bins = 10
n, bins, patches = axes.hist(df['mean'], num_bins)
# To plot correct percentages in the y axis
to_percentage = lambda y, pos: str(round( ( y / float(len(df['mean'])) ) * 100.0, 1)) + '%'
axes.yaxis.set_major_formatter(FuncFormatter(to_percentage))
# ax1.set_xlim(0,1.1)
axes.set_ylabel('Probability')
axes.set_xlabel('Average Flexibility')
axes.set_title("Distribution of Drivers' Average Flexibility (Total Drivers: 143)")
fig.savefig('results/FlexDrivers/'+'AllFlexEV.png', dpi=300, format='png',bbox_inches='tight')
In [206]:
sns.set_context('poster')
fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
# fig.subplots_adjust(hspace = .1, wspace=.1)
num_bins = 10
n, bins, patches = axes.hist(df['count'], num_bins)
# To plot correct percentages in the y axis
to_percentage = lambda y, pos: str(round( ( y / float(len(df['count'])) ) * 100.0, 1)) + '%'
axes.yaxis.set_major_formatter(FuncFormatter(to_percentage))
# ax1.set_xlim(0,1.1)
axes.set_ylabel('Probability')
axes.set_xlabel('Number of Charging Sessions')
axes.set_title('Distribution of Charging Sessions (Total Sessions: '+str(df['count'].sum())+')')
fig.savefig('results/FlexDrivers/'+'EVChargingSessions.png', dpi=300, format='png',bbox_inches='tight')
In [277]:
df = pd.read_csv('results/FlexDrivers/'+'Flexibility_summary.csv')
df2 = df.ix[0:20,:].sort(['mean'])
top5id=df.ix[0:20,:].sort(['10%'],ascending=0)['driverid'][0:5]
fig, (axes) = plt.subplots(2,2,figsize=(18, 12), facecolor='w', edgecolor='k',sharex=True)
# fig.subplots_adjust(hspace = .1, wspace=.1)
axes[0,0].bar(range(len(df2['mean'])),df2['mean'])
axes[0,0].set_ylabel('Average Charging Flexibility')
axes[0,0].set_xlabel('Drivers')
axes[0,0].set_title("Top 20 Drivers's Average Flexibility")
# axes[0].legend(df2.driverid)
axes[0,1].bar(range(len(df.ix[0:20,:].sort(['10%'])['10%'])),df.ix[0:20,:].sort(['10%'])['10%'],facecolor='green',alpha=0.5)
axes[0,1].set_ylabel('Charging Flexibility at 10% Percentile')
axes[0,1].set_xlabel('Drivers')
axes[0,1].set_title("Top 20 Drivers's Charging Flexibility at 10% Percentile")
axes[1,0].bar(range(len(df.ix[0:20,:].sort(['25%'])['25%'])),df.ix[0:20,:].sort(['25%'])['25%'],facecolor='green',alpha=0.5)
axes[1,0].set_ylabel('Charging Flexibility at 25% Percentile')
axes[1,0].set_xlabel('Drivers')
axes[1,0].set_title("Top 20 Drivers's Charging Flexibility at 25% Percentile")
axes[1,1].bar(range(len(df.ix[0:20,:].sort(['50%'])['50%'])),df.ix[0:20,:].sort(['50%'])['50%'],facecolor='green',alpha=0.5)
axes[1,1].set_ylabel('Charging Flexibility at 50% Percentile')
axes[1,1].set_xlabel('Drivers')
axes[1,1].set_title("Top 20 Drivers's Charging Flexibility at 50% Percentile")
for i in range(len(top5id)):
axes[0,1].text(5,0.6-i*0.05,'Driver ID '+str(i+1)+': '+str(top5id.values[i]),fontsize=16,
bbox={'facecolor':'y', 'alpha':0.5})
fig.savefig('results/FlexDrivers/'+'BestFlexEV.png', dpi=300, format='png',bbox_inches='tight')
Top 5 Drivers' Charging Flexibility
In [267]:
top5id = [178085, 225969, 307497, 342601, 182981]
for i in range(5):
df = pd.read_csv('results/FlexDrivers/EVData/'+str(top5id[i])+'.csv')
Driver_Status=df.fillna(-1)
Driver_Status.loc[Driver_Status['power'] > 0.1,'power'] = 1
Driver_Status.loc[(Driver_Status['power'] >= 0) & (Driver_Status['power'] <= 0.1),'power'] = 0
data = Driver_Status.power.values.reshape(len(Driver_Status)/96,96)
data = data.astype(float)
sns.set_context('poster')
fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
# fig.subplots_adjust(hspace = .1, wspace=.1)
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
axes.pcolor(data,cmap=cmap)
# sns.heatmap(data.T, annot=False, linewidths=.5, ax=axes[1])
majorLocator = MultipleLocator(8)
minorLocator = MultipleLocator(1)
axes.xaxis.set_major_locator(majorLocator)
axes.set_xticklabels(range(-2,26,2))
axes.set_xlim(0,96)
axes.set_ylim(0,len(data))
axes.set_ylabel('Charging Session Events')
axes.set_xlabel('Hours')
axes.set_title('All Charging Sessions of #' + ' Driver '+str(top5id[i]))
fig.savefig('results/FlexDrivers/'+'FlexEV'+str(top5id[i])+'.png', dpi=300, format='png',bbox_inches='tight')
In [ ]: