In [9]:
from __future__ import division

# from pulp import *
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.mlab as mlab
import pandas as pd
import csv
import datetime
import sqlite3
import math

import seaborn as sns
%matplotlib inline

from pandas import *
from scipy import optimize
from scipy import stats
from datetime import date, timedelta
import timeit
from collections import OrderedDict

from IPython.display import clear_output
from IPython.display import HTML
from matplotlib.ticker import MultipleLocator

Step 1: Get distinct user_ids and event_ids of all Electric Vehicles Except Fleet Vehicles at the ALCOPARK.


In [17]:
from sqlite3 import OperationalError

def query_EVSE_eventids(startdate, enddate):
    # Query each event, for each event run the optimization based on corresponding energy cost to minimize 
    # the sum of energy cost every 15 minutes.
    print('Start to query EV power at "Alcopark"')

    # Get distinct event_ids for 
    conn = sqlite3.connect("sessionData2016.db")
    cursor = conn.cursor()
    query=("SELECT DISTINCT(DRIVER_ID) FROM sessionData2016")
    query_cond1=("where ENERGY>0 and ENERGY!='' AND")
    query_cond2=("STATION LIKE '%ALCO%' AND") # ALCOBASE

    query_cond4=("(strftime(SESSION_START_TIME) between ") # MONTH IN JAN
    cur = cursor.execute(query+" "+query_cond1+" "+query_cond2+" "+query_cond4+"'"+startdate+"'"+' AND '+"'"+enddate+"'"+')')
    rows = cur.fetchall()
    driver_ids=[]
    for row in rows: driver_ids.append(row[0])
    driver_ids=np.array(driver_ids)
    
    driver_dicts={}
    for driver_id in driver_ids:
        try:
            query=("SELECT DISTINCT(EVENT_ID) FROM sessionData2016 WHERE DRIVER_ID=")
            query_cond=str(driver_id)
            query_cond1=(" AND ENERGY>0 and ENERGY!='' AND")
            query_cond2=("STATION LIKE '%ALCO%' AND") # ALCOPARK BASE
            query_cond4=("strftime(SESSION_START_TIME) between ") # MONTH IN JAN
            cur = cursor.execute(query+" "+query_cond+ " "+query_cond1+" "+query_cond2+" "+query_cond4+"'"+startdate+"'"+' AND '+"'"+enddate+"'")

            rows = cur.fetchall()
            event_ids=[]
            for row in rows: event_ids.append(row[0])
            driver_dicts.update({driver_id:event_ids})
        except OperationalError:
            print('Oops! That was no valid Driver ID: ' + str(driver_id))
        
    conn.close()
    return driver_dicts

In [18]:
driver_dicts=query_EVSE_eventids('1/1/16', '8/1/16')


Start to query EV power at "Alcopark Base"
Oops! That was no valid Driver ID: Unregistered

In [58]:
print ('There are '+str(len(driver_dicts))+" drivers at AlcoBASE")


There are 2 drivers at AlcoBASE

In [27]:
# query event_ids for driver_id with the most charging events
ordered_driver = OrderedDict(sorted(driver_dicts.viewitems(), key=lambda x: len(x[1]), reverse=True))

def query_driver_sessions(startdate, enddate, ranks):
    
    for rank in range(ranks):
        Driver_Power = pd.DataFrame([])
        event_ids = ordered_driver.values()[rank]
        
        for event_id in event_ids:  
            conn = sqlite3.connect("minData2016.db")
            cursor = conn.cursor()
            # FORM A QUERY TO GET RELEVANT DATA, MAKE SURE TO FLOOR THE STARTING OR ENDING POINT
            # THIS WUERY WILL RETURN INTERVAL_ENERGY IN KWH, THIS REALLY IS WHAT WE ARE INTERESTED IN FOR OPTIMIZATION 
            # WE WILL NEED THE TIME FIELD TO GET THE CORRESPONDING VALUES OF ENERGY COST
            # THE REST WILL BE USED WHEN POPULATING THE MINDATA DATABASE
            query=("SELECT INTERVAL_START_DATE AS START,INTERVAL_END_DATE AS END,AVERAGE_POWER,ENERGY FROM minData2016 WHERE ENERGY!='' AND EVENT_ID=")
            query_cond=str(event_id)
            cur = cursor.execute(query+query_cond)
            df = pd.DataFrame(cur.fetchall())
            df.columns=['start','end','power','cumulative_energy']
            df['StartDate'] = df['start'].apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
            df.index = df['StartDate']
            try:
                df = df.resample('15min').bfill()
                df = df.rename(columns = {'power':event_id})
                Driver_Power = pd.concat([Driver_Power,pd.DataFrame(df[event_id])], axis = 1)
            except ValueError:
                print('Oops! That was no valid Event ID: ' + str(event_id))
    return Driver_Power

In [28]:
Driver_Power=query_driver_sessions('1/1/16', '2/1/16',1)


Oops! That was no valid Event ID: 43424623

In [64]:
# query event_ids for driver_id with the most charging events
ordered_driver = OrderedDict(sorted(driver_dicts.viewitems(), key=lambda x: len(x[1]), reverse=True))

def query_event_sessions(startdate, enddate, ranks):
    
    for rank in range(ranks):
        Driver_Power = pd.DataFrame([])
        event_ids = ordered_driver.values()[rank]
        
        for event_id in event_ids:  
            conn = sqlite3.connect("sessionData2016.db")
            cursor = conn.cursor()
            # FORM A QUERY TO GET RELEVANT DATA, MAKE SURE TO FLOOR THE STARTING OR ENDING POINT
            # THIS WUERY WILL RETURN INTERVAL_ENERGY IN KWH, THIS REALLY IS WHAT WE ARE INTERESTED IN FOR OPTIMIZATION 
            # WE WILL NEED THE TIME FIELD TO GET THE CORRESPONDING VALUES OF ENERGY COST
            # THE REST WILL BE USED WHEN POPULATING THE MINDATA DATABASE
            query=("SELECT EVENT_ID, SESSION_START_TIME AS START,SESSION_END_TIME AS END, SESSION_LENGTH AS LENGTH, ENERGY, STATION FROM sessionData2016 WHERE ENERGY!='' AND EVENT_ID=")
            query_cond=str(event_id)
            cur = cursor.execute(query+query_cond)
            df = pd.DataFrame(cur.fetchall())
            df.columns=['event_id','start','end','length','cumulative_energy','station']
#             df['StartDate'] = df['start'].apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
#             df.index = event_id
            try:
#                 df = df.resample('15min').bfill()
#                 df = df.rename(columns = {'power':event_id})
                Driver_Power = pd.concat([Driver_Power,df])
            except ValueError:
                print('Oops! That was no valid Event ID: ' + str(event_id))
        Driver_Power.to_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[rank]+'_EventSessions.csv')
    return Driver_Power

In [65]:
event_sessions = query_event_sessions('1/1/16','12/31/16',len(ordered_driver))

In [116]:
import math
event_sessions = pd.read_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[0]+'_EventSessions.csv')
# event_sessions.index = event_sessions.ix[:,0].apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
event_sessions.drop(event_sessions.columns[[0]], axis=1, inplace=True)
event_sessions.start = event_sessions.start.apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
event_sessions.end = event_sessions.end.apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
event_sessions['duration'] = (event_sessions.end - event_sessions.start).apply(lambda x: timedelta.total_seconds(x) / 3600)
event_sessions['time'] = map(lambda x: x.strftime('%H:%M'),event_sessions.start)
event_sessions['timestamp'] = event_sessions.time.apply(lambda x: int(x.split(':')[0])*3600 + int(x.split(':')[0])*60)

In [149]:
sns.set_context('poster')

for i in range(len(ordered_driver)):
    event_sessions = pd.read_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[i]+'_EventSessions.csv')
    # event_sessions.index = event_sessions.ix[:,0].apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
    event_sessions.drop(event_sessions.columns[[0]], axis=1, inplace=True)
    event_sessions.start = event_sessions.start.apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
    event_sessions.end = event_sessions.end.apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
    event_sessions['duration'] = (event_sessions.end - event_sessions.start).apply(lambda x: timedelta.total_seconds(x) / 3600)
    event_sessions['time'] = map(lambda x: x.strftime('%H:%M'),event_sessions.start)
    event_sessions['hour'] = map(lambda x: int(x.strftime('%H')),event_sessions.start)
    event_sessions['timestamp'] = event_sessions.time.apply(lambda x: int(x.split(':')[0])*3600 + int(x.split(':')[0])*60)
    
    fig, (axes) = plt.subplots(1,1,figsize=(9, 5), facecolor='w', edgecolor='k', sharey=True)
    fig.subplots_adjust(hspace = .1, wspace=.1)

    axes.scatter(event_sessions.timestamp / 3600, event_sessions.duration, alpha=0.5)
    axes.set_ylabel('Charging Session Length, [hour]')
    axes.set_xlabel('Session Start Time')
    majorLocator = MultipleLocator(2)
    minorLocator = MultipleLocator(1)
    axes.set_xlim(0,24)
    axes.xaxis.set_major_locator(majorLocator)
    # axes.set_xticklabels(range(-2,26,2))

    axes.set_title('Drivers Charging Sessions (DriverID:'+ordered_driver.keys()[i]+')')
    fig.savefig('results/FlexDrivers/2016EVData/'+'Session_'+ordered_driver.keys()[i]+'.png', dpi=300, format='png',bbox_inches='tight')
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(18, 6), facecolor='w', edgecolor='k', sharex=False)

    sns.distplot(event_sessions.hour, bins=range(24), ax = ax1)
    sns.distplot(event_sessions.duration, bins=range(100), color = 'green',ax = ax2)
    ax1.set_ylabel('Probability')
    ax1.set_xlabel('Hour of Day')
    ax1.set_title('Distribution of Arrival Time')
    ax1.set_xlim(0,24)

    ax2.set_ylabel('Probability')
    ax2.set_xlabel('Hours')
    ax2.set_title('Distribution of Session Length')
    ax2.set_xlim(0,100)
    fig.savefig('results/FlexDrivers/2016EVData/'+'SessionLength_'+ordered_driver.keys()[i]+'.png', dpi=300, format='png',bbox_inches='tight')



In [143]:
event_sessions


Out[143]:
event_id start end length cumulative_energy station duration time hour timestamp
0 47749385 2016-07-28 15:59:00 2016-07-29 05:24:00 13:25:07 7.991234 ALAMEDA / ALCOBASE - 002 13.416667 15:59 15 54900
1 47741231 2016-07-28 13:30:00 2016-07-28 15:58:00 2:28:59 8.182617 ALAMEDA / ALCOBASE - 002 2.466667 13:30 13 47580
2 47737027 2016-07-28 12:31:00 2016-07-28 13:20:00 0:49:18 4.750794 ALAMEDA / ALCOBASE - 001 0.816667 12:31 12 43920
3 47689539 2016-07-27 17:03:00 2016-07-28 07:31:00 14:27:23 15.337451 ALAMEDA / ALCOBASE - 004 14.466667 17:03 17 62220
4 47684455 2016-07-27 15:07:00 2016-07-28 07:25:00 16:18:15 4.105227 ALAMEDA / ALCOBASE - 002 16.300000 15:07 15 54900
5 47667751 2016-07-27 10:42:00 2016-07-27 15:05:00 4:23:25 13.619422 ALAMEDA / ALCOBASE - 002 4.383333 10:42 10 36600
6 47667941 2016-07-27 10:40:00 2016-07-27 12:06:00 1:25:50 8.212305 ALAMEDA / ALCOBASE - 001 1.433333 10:40 10 36600
7 47656091 2016-07-27 08:35:00 2016-07-27 16:28:00 7:53:06 5.181712 ALAMEDA / ALCOBASE - 004 7.883333 08:35 8 29280
8 47618131 2016-07-26 13:58:00 2016-07-27 07:51:00 17:52:58 17.122309 ALAMEDA / ALCOBASE - 004 17.883333 13:58 13 47580
9 47565437 2016-07-25 17:16:00 2016-07-26 07:26:00 14:10:45 21.157875 ALAMEDA / ALCOBASE - 004 14.166667 17:16 17 62220
10 47544467 2016-07-25 11:07:00 2016-07-25 15:29:00 4:21:52 20.715448 ALAMEDA / ALCOBASE - 001 4.366667 11:07 11 40260
11 47544479 2016-07-25 11:07:00 2016-07-25 15:31:00 4:24:14 9.576632 ALAMEDA / ALCOBASE - 004 4.400000 11:07 11 40260
12 47446835 2016-07-22 17:49:00 2016-07-25 08:39:00 62:50:34 10.675963 ALAMEDA / ALCOBASE - 001 62.833333 17:49 17 62220
13 47425311 2016-07-22 11:16:00 2016-07-22 12:45:00 1:28:36 8.398141 ALAMEDA / ALCOBASE - 004 1.483333 11:16 11 40260
14 47357319 2016-07-21 09:40:00 2016-07-21 12:36:00 2:56:11 12.427893 ALAMEDA / ALCOBASE - 004 2.933333 09:40 9 32940
15 47356617 2016-07-21 09:32:00 2016-07-21 09:39:00 0:06:57 0.601419 ALAMEDA / ALCOBASE - 004 0.116667 09:32 9 32940
16 47316129 2016-07-20 15:16:00 2016-07-20 18:46:00 3:29:37 14.949003 ALAMEDA / ALCOBASE - 004 3.500000 15:16 15 54900
17 47313293 2016-07-20 14:24:00 2016-07-20 14:28:00 0:04:02 0.314199 ALAMEDA / ALCOBASE - 004 0.066667 14:24 14 51240
18 47219691 2016-07-19 07:54:00 2016-07-19 11:30:00 3:36:07 13.671993 ALAMEDA / ALCOBASE - 004 3.600000 07:54 7 25620
19 47190629 2016-07-18 15:34:00 2016-07-19 07:52:00 16:17:49 10.352896 ALAMEDA / ALCOBASE - 004 16.300000 15:34 15 54900
20 47185009 2016-07-18 13:47:00 2016-07-19 05:14:00 15:27:16 14.080965 ALAMEDA / ALCOBASE - 001 15.450000 13:47 13 47580
21 47071623 2016-07-15 15:38:00 2016-07-18 06:58:00 63:19:32 4.360399 ALAMEDA / ALCOBASE - 002 63.333333 15:38 15 54900
22 47060679 2016-07-15 12:34:00 2016-07-15 14:41:00 2:07:15 4.590492 ALAMEDA / ALCOBASE - 004 2.116667 12:34 12 43920
23 47018105 2016-07-14 19:00:00 2016-07-15 08:18:00 13:17:57 5.540244 ALAMEDA / ALCOBASE - 001 13.300000 19:00 19 69540
24 47018087 2016-07-14 19:00:00 2016-07-15 07:04:00 12:04:04 10.090083 ALAMEDA / ALCOBASE - 004 12.066667 19:00 19 69540
25 47013997 2016-07-14 17:14:00 2016-07-15 08:46:00 15:32:25 14.736654 ALAMEDA / ALCOBASE - 002 15.533333 17:14 17 62220
26 47012143 2016-07-14 16:30:00 2016-07-14 19:00:00 2:29:14 5.096399 ALAMEDA / ALCOBASE - 004 2.500000 16:30 16 58560
27 47006757 2016-07-14 14:38:00 2016-07-14 16:30:00 1:52:23 7.725948 ALAMEDA / ALCOBASE - 004 1.866667 14:38 14 51240
28 46990759 2016-07-14 10:32:00 2016-07-14 13:38:00 3:06:07 3.066415 ALAMEDA / ALCOBASE - 001 3.100000 10:32 10 36600
29 46934717 2016-07-13 12:21:00 2016-07-13 13:33:00 1:11:19 5.982751 ALAMEDA / ALCOBASE - 004 1.200000 12:21 12 43920
... ... ... ... ... ... ... ... ... ... ...
348 37304393 2016-01-20 11:50:00 2016-01-20 15:58:00 4:08:35 9.010077 ALAMEDA / ALCOBASE - 003 4.133333 11:50 11 40260
349 37258133 2016-01-19 15:41:00 2016-01-20 12:12:00 20:30:07 11.555791 ALAMEDA / ALCOBASE - 002 20.516667 15:41 15 54900
350 37256095 2016-01-19 14:59:00 2016-01-19 15:31:00 0:31:57 1.285771 ALAMEDA / ALCOBASE - 004 0.533333 14:59 14 51240
351 37256063 2016-01-19 14:59:00 2016-01-19 15:12:00 0:13:23 1.216567 ALAMEDA / ALCOBASE - 001 0.216667 14:59 14 51240
352 37248925 2016-01-19 12:57:00 2016-01-19 14:59:00 2:02:09 6.353370 ALAMEDA / ALCOBASE - 004 2.033333 12:57 12 43920
353 37239541 2016-01-19 10:28:00 2016-01-19 14:59:00 4:30:35 15.576818 ALAMEDA / ALCOBASE - 001 4.516667 10:28 10 36600
354 37093139 2016-01-15 12:29:00 2016-01-15 13:04:00 0:34:19 3.331161 ALAMEDA / ALCOBASE - 001 0.583333 12:29 12 43920
355 37086573 2016-01-15 10:37:00 2016-01-15 13:04:00 2:27:28 13.685199 ALAMEDA / ALCOBASE - 004 2.450000 10:37 10 36600
356 37050971 2016-01-14 18:11:00 2016-01-19 14:43:00 116:31:25 18.263008 ALAMEDA / ALCOBASE - 002 116.533333 18:11 18 65880
357 37035853 2016-01-14 13:02:00 2016-01-15 05:18:00 16:15:43 10.094934 ALAMEDA / ALCOBASE - 004 16.266667 13:02 13 47580
358 37033533 2016-01-14 12:31:00 2016-01-14 14:30:00 1:58:50 7.158699 ALAMEDA / ALCOBASE - 001 1.983333 12:31 12 43920
359 37025273 2016-01-14 10:16:00 2016-01-14 11:08:00 0:51:41 4.615899 ALAMEDA / ALCOBASE - 001 0.866667 10:16 10 36600
360 36988537 2016-01-13 17:06:00 2016-01-14 09:19:00 16:13:29 16.758352 ALAMEDA / ALCOBASE - 001 16.216667 17:06 17 62220
361 36981341 2016-01-13 14:31:00 2016-01-13 17:05:00 2:33:57 6.882283 ALAMEDA / ALCOBASE - 001 2.566667 14:31 14 51240
362 36979277 2016-01-13 13:54:00 2016-01-14 08:25:00 18:31:22 18.513451 ALAMEDA / ALCOBASE - 004 18.516667 13:54 13 47580
363 36976943 2016-01-13 13:16:00 2016-01-13 13:57:00 0:41:12 3.957393 ALAMEDA / ALCOBASE - 002 0.683333 13:16 13 47580
364 36970475 2016-01-13 11:40:00 2016-01-13 11:55:00 0:15:01 1.383565 ALAMEDA / ALCOBASE - 004 0.250000 11:40 11 40260
365 36963369 2016-01-13 09:44:00 2016-01-13 09:54:00 0:09:38 0.889396 ALAMEDA / ALCOBASE - 004 0.166667 09:44 9 32940
366 36931929 2016-01-12 18:53:00 2016-01-13 07:29:00 12:36:26 21.584146 ALAMEDA / ALCOBASE - 004 12.600000 18:53 18 65880
367 36736423 2016-01-08 15:41:00 2016-01-11 11:53:00 68:11:19 6.255723 ALAMEDA / ALCOBASE - 003 68.200000 15:41 15 54900
368 36736347 2016-01-08 15:40:00 2016-01-08 15:41:00 0:00:40 0.000230 ALAMEDA / ALCOBASE - 003 0.016667 15:40 15 54900
369 36735471 2016-01-08 15:21:00 2016-01-11 08:32:00 65:10:37 7.189394 ALAMEDA / ALCOBASE - 004 65.183333 15:21 15 54900
370 36733073 2016-01-08 14:36:00 2016-01-08 17:15:00 2:38:33 9.520745 ALAMEDA / ALCOBASE - 002 2.650000 14:36 14 51240
371 36723517 2016-01-08 12:09:00 2016-01-08 13:36:00 1:27:28 6.737920 ALAMEDA / ALCOBASE - 004 1.450000 12:09 12 43920
372 36668939 2016-01-07 14:09:00 2016-01-08 08:47:00 18:37:29 2.515590 ALAMEDA / ALCOBASE - 001 18.633333 14:09 14 51240
373 36661055 2016-01-07 12:12:00 2016-01-07 12:26:00 0:14:24 1.353738 ALAMEDA / ALCOBASE - 001 0.233333 12:12 12 43920
374 36651679 2016-01-07 09:44:00 2016-01-07 16:26:00 6:41:58 12.039354 ALAMEDA / ALCOBASE - 003 6.700000 09:44 9 32940
375 36641829 2016-01-07 08:13:00 2016-01-07 10:30:00 2:17:08 10.225253 ALAMEDA / ALCOBASE - 004 2.283333 08:13 8 29280
376 36539835 2016-01-05 12:45:00 2016-01-05 14:19:00 1:34:34 9.119121 ALAMEDA / ALCOBASE - 002 1.566667 12:45 12 43920
377 36524871 2016-01-05 09:07:00 2016-01-05 14:04:00 4:57:04 4.934621 ALAMEDA / ALCOBASE - 003 4.950000 09:07 9 32940

378 rows × 10 columns

Load flexibility depends on the charging duration and the overall flexibility as the ratio of the duration that a car is plugged but not charging to the overall session duration.

$$l_{flex}=\frac{d_{session}-d_{charge}}{d_{session}}$$

In [48]:
def query_driver_sessions(startdate, enddate, ranks):
    
    for rank in range(ranks):
        Driver_Power = pd.DataFrame([])
        event_ids = ordered_driver.values()[rank]
        
        for event_id in event_ids:  
            conn = sqlite3.connect("minData2016.db")
            cursor = conn.cursor()
            # FORM A QUERY TO GET RELEVANT DATA, MAKE SURE TO FLOOR THE STARTING OR ENDING POINT
            # THIS WUERY WILL RETURN INTERVAL_ENERGY IN KWH, THIS REALLY IS WHAT WE ARE INTERESTED IN FOR OPTIMIZATION 
            # WE WILL NEED THE TIME FIELD TO GET THE CORRESPONDING VALUES OF ENERGY COST
            # THE REST WILL BE USED WHEN POPULATING THE MINDATA DATABASE
            query=("SELECT INTERVAL_START_DATE AS START,INTERVAL_END_DATE AS END,AVERAGE_POWER,ENERGY FROM minData2016 WHERE ENERGY!='' AND EVENT_ID=")
            query_cond=str(event_id)
            cur = cursor.execute(query+query_cond)
            df = pd.DataFrame(cur.fetchall())
            df.columns=['start','end','power','cumulative_energy']
            df['StartDate'] = df['start'].apply(lambda x: datetime.strptime(x,'%m/%d/%y %H:%M'))
            df.index = df['StartDate']
            try:
                df = df.resample('15min').bfill()
                df = df.rename(columns = {'power':event_id})
                Driver_Power = pd.concat([Driver_Power,pd.DataFrame(df[event_id])], axis = 1)
            except ValueError:
                print('Oops! That was no valid Event ID: ' + str(event_id))
        
        try:
            # Save each driver's all charging sessions into csv
            Driver_Power.to_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[rank]+'.csv')

            # Summarize Each Driver's Charging Behaviours
            Driver_Status = pd.DataFrame()
            Driver_Status['power'] = Driver_Power.power
            Driver_Status=Driver_Status.fillna(-1)
            for i in range(len(Driver_Status.columns)):
                Driver_Status.loc[Driver_Status[Driver_Status.columns[i]] > 0,Driver_Status.columns[i]] = 1

            # Create each driver's charging behavior to csv
            Driver_Summary=Driver_Status
            Driver_Summary['day'] = map(lambda x: x.strftime('%Y-%m-%d'),Driver_Summary.index)
            Driver_Summary['hour'] = map(lambda x: x.strftime('%H'),Driver_Summary.index)
            Driver_Summary['hour'] = Driver_Summary.hour.astype(int)
            Driver_Summary['power'] = Driver_Summary.power.astype(int)

            with open('results/FlexDrivers/2016EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv','wb') as csvfile:
                writer = csv.writer(csvfile, delimiter=',')
                header = ['day', 'StartTime','ChargingLength','SessionLength', 'Flexibility']
                writer.writerow(header)
                for day in sorted(set(Driver_Summary['day'])):
                    df1 = Driver_Summary.loc[(Driver_Summary['power'] >= 0) & (Driver_Summary['day'] == day)]
                    df2 = Driver_Summary.loc[(Driver_Summary['power'] >= 1) & (Driver_Summary['day'] == day)]
                    SessionLength = timedelta.total_seconds(df1.index[-1]-df1.index[0])
                    ChargingLength = timedelta.total_seconds(df2.index[-1]-df2.index[0])
                    if SessionLength != 0:
                        Flexibility = (SessionLength-ChargingLength)/SessionLength
                    else:
                        Flexibility = 0
                    StartTime = df1.index[0].strftime('%H:%M')

                    writer.writerow([day, StartTime,ChargingLength,SessionLength, Flexibility])
            print("EV Driver " + ordered_driver.keys()[rank] + " is Good") 
        except AttributeError:
            print('Oops! That was no valid Event ID: ' + str(event_id))

In [49]:
query_driver_sessions('1/1/16', '12/31/16',2)


Oops! That was no valid Event ID: 43424623
Oops! That was no valid Event ID: 36468591
Oops! That was no valid Event ID: 36524871

Read each driver's charging event data


In [56]:
Driver_Power = pd.read_csv('results/FlexDrivers/2016EVData/'+ordered_driver.keys()[0]+'.csv')
Driver_Power.index = Driver_Power.ix[:,0].apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))
Driver_Power.drop(Driver_Power.columns[[0]], axis=1, inplace=True)
Driver_Power.dropna(axis=1, how='all')

# Summarize Each Driver's Charging Behaviours
Driver_Status = pd.DataFrame()

for eventid in Driver_Power.columns:
    Driver_Status['power'] = Driver_Power[eventid]
    Driver_Status=Driver_Status.fillna(-1)
    for i in range(len(Driver_Status.columns)):
        Driver_Status.loc[Driver_Status[Driver_Status.columns[i]] > 0,Driver_Status.columns[i]] = 1

    # Create each driver's charging behavior to csv
    Driver_Summary=Driver_Status
    Driver_Summary['day'] = map(lambda x: x.strftime('%Y-%m-%d'),Driver_Summary.index)
    Driver_Summary['hour'] = map(lambda x: x.strftime('%H'),Driver_Summary.index)
    Driver_Summary['hour'] = Driver_Summary.hour.astype(int)
    Driver_Summary['power'] = Driver_Summary.power.astype(int)

    with open('results/FlexDrivers/2016EVFlexSummary/'+ordered_driver.keys()[0]+'_summary.csv','wb') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        header = ['day', 'StartTime','ChargingLength','SessionLength', 'Flexibility']
        writer.writerow(header)
        for day in sorted(set(Driver_Summary['day'])):
            df1 = Driver_Summary.loc[(Driver_Summary['power'] >= 0) & (Driver_Summary['day'] == day)]
            df2 = Driver_Summary.loc[(Driver_Summary['power'] >= 1) & (Driver_Summary['day'] == day)]
            SessionLength = timedelta.total_seconds(df1.index[-1]-df1.index[0])
            ChargingLength = timedelta.total_seconds(df2.index[-1]-df2.index[0])
            if SessionLength != 0:
                Flexibility = (SessionLength-ChargingLength)/SessionLength
            else:
                Flexibility = 0
            StartTime = df1.index[0].strftime('%H:%M')

            writer.writerow([day, StartTime,ChargingLength,SessionLength, Flexibility])


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-56-01556fe335fc> in <module>()
     27             df1 = Driver_Summary.loc[(Driver_Summary['power'] >= 0) & (Driver_Summary['day'] == day)]
     28             df2 = Driver_Summary.loc[(Driver_Summary['power'] >= 1) & (Driver_Summary['day'] == day)]
---> 29             SessionLength = timedelta.total_seconds(df1.index[-1]-df1.index[0])
     30             ChargingLength = timedelta.total_seconds(df2.index[-1]-df2.index[0])
     31             if SessionLength != 0:

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas-0.18.0-py2.7-macosx-10.6-intel.egg/pandas/tseries/base.pyc in __getitem__(self, key)
    153         getitem = self._data.__getitem__
    154         if lib.isscalar(key):
--> 155             val = getitem(key)
    156             return self._box_func(val)
    157         else:

IndexError: index -1 is out of bounds for axis 0 with size 0

In [129]:
ordered_driver = OrderedDict(sorted(driver_dicts.viewitems(), key=lambda x: len(x[1]), reverse=True))

# stat summary of start time of all drivers
with open('results/FlexDrivers/'+'StartTime_summary.csv','wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
    writer.writerow(header)
        
    for rank in range(len(ordered_driver.keys())):
        df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
        df['StartSeconds'] = map(lambda x: int(x.split(":")[0]) * 3600 + int(x.split(":")[1]) * 60,df['StartTime'])
        writer.writerow([ordered_driver.keys()[rank],len(df['StartSeconds']),df['StartSeconds'].mean(),
                        df['StartSeconds'].std(),df['StartSeconds'].min(),np.percentile(df['StartSeconds'],10),
                        np.percentile(df['StartSeconds'],25),
                        np.percentile(df['StartSeconds'],50),np.percentile(df['StartSeconds'],75),
                        np.percentile(df['StartSeconds'],90),df['StartSeconds'].max()])    

# stat summary of charging length of all drivers
with open('results/FlexDrivers/'+'ChargingLength_summary.csv','wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
    writer.writerow(header)
        
    for rank in range(len(ordered_driver.keys())):
        df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
        writer.writerow([ordered_driver.keys()[rank],len(df['ChargingLength']),df['ChargingLength'].mean(),
                        df['ChargingLength'].std(),df['ChargingLength'].min(),np.percentile(df['ChargingLength'],10),
                        np.percentile(df['ChargingLength'],25),
                        np.percentile(df['ChargingLength'],50),np.percentile(df['ChargingLength'],75),
                        np.percentile(df['ChargingLength'],90),df['ChargingLength'].max()]) 

# stat summary of session length of all drivers
with open('results/FlexDrivers/'+'SessionLength_summary.csv','wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
    writer.writerow(header)
        
    for rank in range(len(ordered_driver.keys())):
        df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
        writer.writerow([ordered_driver.keys()[rank],len(df['SessionLength']),df['SessionLength'].mean(),
                        df['SessionLength'].std(),df['SessionLength'].min(),np.percentile(df['SessionLength'],10),
                        np.percentile(df['SessionLength'],25),
                        np.percentile(df['SessionLength'],50),np.percentile(df['SessionLength'],75),
                        np.percentile(df['SessionLength'],90),df['SessionLength'].max()])    

# stat summary of flexibility of all drivers
with open('results/FlexDrivers/'+'Flexibility_summary.csv','wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    header = ['driverid','count', 'mean','std','min', '10%','25%', '50%', '75%','90%', 'max']
    writer.writerow(header)
        
    for rank in range(len(ordered_driver.keys())):
        df = pd.read_csv('results/FlexDrivers/EVFlexSummary/'+ordered_driver.keys()[rank]+'_summary.csv')
        writer.writerow([ordered_driver.keys()[rank],len(df['Flexibility']),df['Flexibility'].mean(),
                        df['Flexibility'].std(),df['Flexibility'].min(),np.percentile(df['Flexibility'],10),
                        np.percentile(df['Flexibility'],25),
                        np.percentile(df['Flexibility'],50),np.percentile(df['Flexibility'],75),
                        np.percentile(df['Flexibility'],90),df['Flexibility'].max()])

Visualization of Non-Fleet EV's Charging Behaviors

Plot the distribution of each driver's average charging flexibility


In [205]:
from matplotlib.ticker import FuncFormatter

# plot all drivers' flexiblity
df = pd.read_csv('results/FlexDrivers/'+'Flexibility_summary.csv')

sns.set_context('poster')
fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
# fig.subplots_adjust(hspace = .1, wspace=.1)
# sns.distplot(df['mean'],color='m')
axes.set_ylabel('Charging Session Events')
axes.set_xlabel('Flexibility')

num_bins = 10
n, bins, patches = axes.hist(df['mean'], num_bins)
# To plot correct percentages in the y axis     
to_percentage = lambda y, pos: str(round( ( y / float(len(df['mean'])) ) * 100.0, 1)) + '%'
axes.yaxis.set_major_formatter(FuncFormatter(to_percentage))

# ax1.set_xlim(0,1.1)
axes.set_ylabel('Probability')
axes.set_xlabel('Average Flexibility')
axes.set_title("Distribution of Drivers' Average Flexibility (Total Drivers: 143)")
fig.savefig('results/FlexDrivers/'+'AllFlexEV.png', dpi=300, format='png',bbox_inches='tight')



In [206]:
sns.set_context('poster')
fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
# fig.subplots_adjust(hspace = .1, wspace=.1)

num_bins = 10
n, bins, patches = axes.hist(df['count'], num_bins)
# To plot correct percentages in the y axis     
to_percentage = lambda y, pos: str(round( ( y / float(len(df['count'])) ) * 100.0, 1)) + '%'
axes.yaxis.set_major_formatter(FuncFormatter(to_percentage))

# ax1.set_xlim(0,1.1)
axes.set_ylabel('Probability')
axes.set_xlabel('Number of Charging Sessions')
axes.set_title('Distribution of Charging Sessions (Total Sessions: '+str(df['count'].sum())+')')

fig.savefig('results/FlexDrivers/'+'EVChargingSessions.png', dpi=300, format='png',bbox_inches='tight')



In [8]:
df = pd.read_csv('results/FlexDrivers/2015EVFlexResults/'+'Flexibility_summary.csv')
df2 = df.ix[0:20,:].sort(['mean'])
top5id=df.ix[0:20,:].sort(['10%'],ascending=0)['driverid'][0:5]

fig, (axes) = plt.subplots(2,2,figsize=(18, 12), facecolor='w', edgecolor='k',sharex=True)
# fig.subplots_adjust(hspace = .1, wspace=.1)

axes[0,0].bar(range(len(df2['mean'])),df2['mean'])
axes[0,0].set_ylabel('Average Charging Flexibility')
axes[0,0].set_xlabel('# of Driver')
axes[0,0].set_title("Top 20 Drivers's Average Flexibility")
# axes[0].legend(df2.driverid)

axes[0,1].bar(range(len(df.ix[0:20,:].sort(['10%'])['10%'])),df.ix[0:20,:].sort(['10%'])['10%'],facecolor='green',alpha=0.5)
axes[0,1].set_ylabel('Charging Flexibility at 10th Percentile')
axes[0,1].set_xlabel('# of Driver')
axes[0,1].set_title("Top 20 Drivers's Charging Flexibility at 10th Percentile")

axes[1,0].bar(range(len(df.ix[0:20,:].sort(['25%'])['25%'])),df.ix[0:20,:].sort(['25%'])['25%'],facecolor='green',alpha=0.5)
axes[1,0].set_ylabel('Charging Flexibility at 25th Percentile')
axes[1,0].set_xlabel('# of Driver')
axes[1,0].set_title("Top 20 Drivers's Charging Flexibility at 25th Percentile")

axes[1,1].bar(range(len(df.ix[0:20,:].sort(['50%'])['50%'])),df.ix[0:20,:].sort(['50%'])['50%'],facecolor='green',alpha=0.5)
axes[1,1].set_ylabel('Charging Flexibility at 50th Percentile')
axes[1,1].set_xlabel('# of Driver')
axes[1,1].set_title("Top 20 Drivers's Charging Flexibility at 50th Percentile")

for i in range(len(top5id)):
    axes[0,1].text(5,0.6-i*0.05,'Driver ID '+str(i+1)+': '+str(top5id.values[i]),fontsize=16,
                bbox={'facecolor':'y', 'alpha':0.5})
fig.savefig('results/FlexDrivers/'+'BestFlexEV.png', dpi=300, format='png',bbox_inches='tight')


/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  from IPython.kernel.zmq import kernelapp as app
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/kernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/kernel/__main__.py:14: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/kernel/__main__.py:19: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/kernel/__main__.py:24: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

Top 5 Drivers' Charging Flexibility


In [6]:
top5id = [178085, 225969, 307497, 342601, 182981]

for i in range(5):
    df = pd.read_csv('results/FlexDrivers/EVData/'+str(top5id[i])+'.csv')
    Driver_Status=df.fillna(-1)
    Driver_Status.loc[Driver_Status['power'] > 0.1,'power'] = 1
    Driver_Status.loc[(Driver_Status['power'] >= 0) & (Driver_Status['power'] <= 0.1),'power'] = 0

    data = Driver_Status.power.values.reshape(len(Driver_Status)/96,96)
    data = data.astype(float)

    sns.set_context('poster')
    fig, (axes) = plt.subplots(1,1,figsize=(10, 6), facecolor='w', edgecolor='k')
    # fig.subplots_adjust(hspace = .1, wspace=.1)
    cmap = sns.cubehelix_palette(light=1, as_cmap=True)
    axes.pcolor(data,cmap=cmap)

    # sns.heatmap(data.T, annot=False, linewidths=.5, ax=axes[1])
    majorLocator = MultipleLocator(8)
    minorLocator = MultipleLocator(1)
    axes.xaxis.set_major_locator(majorLocator)
    axes.set_xticklabels(range(-2,26,2))
    axes.set_xlim(0,96)
    axes.set_ylim(0,len(data))
    axes.set_ylabel('Charging Session Events')
    axes.set_xlabel('Hour of Day')
    axes.set_title('All Charging Sessions of #' + ' Driver '+str(top5id[i]))
    
    axes.text(2, 140, "Charging", size=16, rotation=0.,color='white',
         ha="left", va="center",
         bbox=dict(boxstyle="round",
                   ec=(1.0, 0.5,0.5),
                   fc=(0.176,0.121,0.238),
                   )
         )
    axes.text(2, 120, "Plugged in not charging", size=16, rotation=0.,color='white',
         ha="left", va="center",
         bbox=dict(boxstyle="round",
                   ec=(1., 0.5, 0.5),
                   fc=(0.738,0.484,0.594),
                   )
         )
    
    fig.savefig('results/FlexDrivers/'+'FlexEV'+str(top5id[i])+'.png', dpi=300, format='png',bbox_inches='tight')



In [ ]: