In [1]:

    
import IPython.core.display as di
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Show/Hide code</button>''', raw=True)



In [2]:

    
#Allow the created content to be interactivelly ploted inline
%matplotlib inline
#Establish width and height for all plots in the report
#pylab.rcParams['figure.figsize'] = (18, 6) #width, height



In [3]:

    
#Import needed libraries
import os
from os.path import join, getsize
import pandas as pd
from cycler import cycler
import matplotlib.pyplot as plt
from IPython.display import display
import numpy as np
import matplotlib as mpl
inline_rc = dict(mpl.rcParams)
#the next cell enables plotting tables without borders



In [4]:

    
%%html
<style>
table,td,tr,th {border:none!important}
</style>

Summary report of the CO2MPAS WLTP to NEDC CO$_2$ emission simulation model

Visit the CO2MPAS home page

Complete path to the CO2MPAS summary file used in this report:



In [5]:

    
#Specify the output folder and file containing the CO2MPAS summary output file.
folder = r'D:\co2mpas-version-trials\20160406\A8_pasqua'
file = '20160406_132859-summary.xlsx'
#new_file = '20160316_160127-summary.xlsx'
infile = join(folder, file)
#new_infile = join(folder, new_file)
df=pd.read_excel(infile, 'summary', header=[0, 1, 2], index_col=[0], skiprows=[3])
#new_df=pd.read_excel(new_infile, 'summary', header=[0, 1, 2], index_col=[0], skiprows=[3])
print(infile)
#print(new_infile)









    



D:\co2mpas-version-trials\20160406\A8_pasqua\20160406_132859-summary.xlsx



In [6]:

    
#Gather and name the basic variables used in the report according to their name in the CO2MPAS output file
NEDC = df['nedc']['prediction']['co2_emission_value']
NEDCt = df['nedc']['target']['co2_emission_value']
dNEDC = NEDC-NEDCt
UDC = df['nedc']['prediction']['co2_emission_UDC']
UDCt = df['nedc']['target']['co2_emission_UDC']
dUDC = UDC - UDCt
EUDC = df['nedc']['prediction']['co2_emission_EUDC']
EUDCt = df['nedc']['target']['co2_emission_EUDC']
dEUDC = EUDC - EUDCt
#Obtain the case number and vehicle model from the input file
df['vehicle'] = df.index
cases = df['vehicle'].str.split('_').str[-1].astype('int')
model = df['vehicle'].str.split('_').str[0]
#Create a dataframe with this data
valuesDF = pd.DataFrame({'NEDC': NEDC,'NEDCt':NEDCt, 'dNEDC':dNEDC,'UDC': UDC,'UDCt':UDCt, 'dUDC':dUDC,'EUDC': EUDC,'EUDCt':EUDCt, 'dEUDC':dEUDC,'Case':cases,'Model':model})   
if (valuesDF.NEDC.count()-valuesDF.NEDCt.count()) != 0:
    print('NOTE:',valuesDF.NEDC.count(),'NEDC 'u'CO\u2082 values provided and',valuesDF.NEDCt.count(),'target NEDC 'u'CO\u2082 values provided')
    print('      Reporting will continue only with cases containing all the needed input')
valuesDF = valuesDF.dropna()



In [7]:

    
#Gather and name the basic variables used in the report according to their name in the CO2MPAS output file
# nNEDC = new_df['nedc']['prediction']['co2_emission_value']
# nNEDCt = new_df['nedc']['target']['co2_emission_value']
# ndNEDC = nNEDC-nNEDCt
# nUDC = new_df['nedc']['prediction']['co2_emission_UDC']
# nUDCt = new_df['nedc']['target']['co2_emission_UDC']
# ndUDC = nUDC - nUDCt
# nEUDC = new_df['nedc']['prediction']['co2_emission_EUDC']
# nEUDCt = new_df['nedc']['target']['co2_emission_EUDC']
# ndEUDC = nEUDC - nEUDCt
# #Obtain the case number and vehicle model from the input file
# new_df['vehicle'] = new_df.index
# ncases = new_df['vehicle'].str.split('_').str[-1].astype('int')
# nmodel = new_df['vehicle'].str.split('_').str[0]
# #Create a dataframe with this data
# new_valuesDF = pd.DataFrame({'NEDC': nNEDC,'NEDCt':nNEDCt, 'dNEDC':ndNEDC,'UDC': nUDC,'UDCt':nUDCt, 'dUDC':ndUDC,'EUDC': nEUDC,'EUDCt':nEUDCt, 'dEUDC':ndEUDC,'Case':ncases,'Model':nmodel})   
# if (new_valuesDF.NEDC.count()-new_valuesDF.NEDCt.count()) != 0:
#     print('NOTE:',new_valuesDF.NEDC.count(),'NEDC 'u'CO\u2082 values provided and',new_valuesDF.NEDCt.count(),'target NEDC 'u'CO\u2082 values provided')
#     print('      Reporting will continue only with cases containing all the needed input')
# new_valuesDF = new_valuesDF.dropna()

Section 1. Performance of the model. All vehicles and test cases.

Error statistics for CO$_2$ emission per driving cycle

Error statistics for NEDC, UDC, and EUDC CO$_2$ emission



In [8]:

    
#Create a dataframe with the NECD, UDC, EUDC error statistics
errorsDF = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['NEDC [gCO$_2$ km$^{-1}$]','UDC [gCO$_2$ km$^{-1}$]', 'EUDC [gCO$_2$ km$^{-1}$]'])
errorsDF.loc['Averages'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.mean(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.mean(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.mean(),2)})
errorsDF.loc['StdError'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sem(),2)})
errorsDF.loc['Median'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.median(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.median(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.median(),2)})
errorsDF.loc['Mode'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':valuesDF.dNEDC.mode().iloc[0], 'UDC [gCO$_2$ km$^{-1}$]':valuesDF.dUDC.mode().iloc[0], 'EUDC [gCO$_2$ km$^{-1}$]':valuesDF.dEUDC.mode().iloc[0]})
errorsDF.loc['StdDev'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.std(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.std(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.std(),2)})
errorsDF.loc['Variance'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.var(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.var(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.var(),2)})
errorsDF.loc['Kurtosis'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.kurtosis(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.kurtosis(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.kurtosis(),2)})
errorsDF.loc['Skweness'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.skew(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.skew(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.skew(),2)})
errorsDF.loc['Range'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round((valuesDF.dNEDC.max()-valuesDF.dNEDC.min()),2), 'UDC [gCO$_2$ km$^{-1}$]':round((valuesDF.dUDC.max()-valuesDF.dUDC.min()),2), 'EUDC [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDC.max()-valuesDF.dEUDC.min()),2)})
errorsDF.loc['Minimum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.min(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.min(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.min(),2)})
errorsDF.loc['Maximum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.max(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.max(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.max(),2)})
errorsDF.loc['Sum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.sum(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.sum(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.sum(),2)})
errorsDF.loc['Count'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dNEDC.count(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dUDC.count(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(valuesDF.dEUDC.count(),2)})
errorsDF.loc['Confidence level (95%)'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':2*round(valuesDF.dEUDC.sem(),2)})
errorsDF









    Out[8]:






  
    
      
      NEDC [gCO$_2$ km$^{-1}$]
      UDC [gCO$_2$ km$^{-1}$]
      EUDC [gCO$_2$ km$^{-1}$]
    
  
  
    
      Averages
      -3.61
      -8.3
      -0.89
    
    
      StdError
      0.18
      0.37
      0.12
    
    
      Median
      -3.53
      -7.46
      -1.13
    
    
      Mode
      -3.38939
      -7.36109
      -1.09069
    
    
      StdDev
      2.33
      4.72
      1.48
    
    
      Variance
      5.43
      22.3
      2.21
    
    
      Kurtosis
      -0.25
      0.23
      0.12
    
    
      Skweness
      -0.39
      -0.39
      0.39
    
    
      Range
      10.58
      24.23
      9.3
    
    
      Minimum
      -8.91
      -19.63
      -5.05
    
    
      Maximum
      1.67
      4.6
      4.25
    
    
      Sum
      -588.49
      -1352.3
      -144.78
    
    
      Count
      163
      163
      163
    
    
      Confidence level (95%)
      0.36
      0.74
      0.24



In [9]:

    
#Create a dataframe with the NECD, UDC, EUDC error statistics
# nerrorsDF = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['NEDC [gCO$_2$ km$^{-1}$]','UDC [gCO$_2$ km$^{-1}$]', 'EUDC [gCO$_2$ km$^{-1}$]'])
# nerrorsDF.loc['Averages'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.mean(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.mean(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.mean(),2)})
# nerrorsDF.loc['StdError'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.sem(),2)})
# nerrorsDF.loc['Median'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.median(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.median(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.median(),2)})
# nerrorsDF.loc['Mode'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':new_valuesDF.dNEDC.mode().iloc[0], 'UDC [gCO$_2$ km$^{-1}$]':new_valuesDF.dUDC.mode().iloc[0], 'EUDC [gCO$_2$ km$^{-1}$]':new_valuesDF.dEUDC.mode().iloc[0]})
# nerrorsDF.loc['StdDev'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.std(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.std(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.std(),2)})
# nerrorsDF.loc['Variance'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.var(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.var(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.var(),2)})
# nerrorsDF.loc['Kurtosis'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.kurtosis(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.kurtosis(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.kurtosis(),2)})
# nerrorsDF.loc['Skweness'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.skew(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.skew(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.skew(),2)})
# nerrorsDF.loc['Range'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round((new_valuesDF.dNEDC.max()-new_valuesDF.dNEDC.min()),2), 'UDC [gCO$_2$ km$^{-1}$]':round((new_valuesDF.dUDC.max()-new_valuesDF.dUDC.min()),2), 'EUDC [gCO$_2$ km$^{-1}$]':round((new_valuesDF.dEUDC.max()-new_valuesDF.dEUDC.min()),2)})
# nerrorsDF.loc['Minimum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.min(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.min(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.min(),2)})
# nerrorsDF.loc['Maximum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.max(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.max(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.max(),2)})
# nerrorsDF.loc['Sum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.sum(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.sum(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.sum(),2)})
# nerrorsDF.loc['Count'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dNEDC.count(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dUDC.count(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(new_valuesDF.dEUDC.count(),2)})
# nerrorsDF.loc['Confidence level (95%)'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':2*round(new_valuesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':2*round(new_valuesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':2*round(new_valuesDF.dEUDC.sem(),2)})
# nerrorsDF

Error statistics for CO$_2$ emission per driving cycle applying a filtering of NEDC absolute error < 25 gCO$_2$ km$^{-1}$

Removed cases and associated error of CO$_2$ emission for NEDC



In [10]:

    
#list of filtered cases
fcases = valuesDF[abs(valuesDF.dNEDC) > 25]
fcases2 = pd.DataFrame({'Absolute error gCO$_2$ km$^{-1}$':fcases.dNEDC})
print((len(fcases.dNEDC)),'cases with an absolute NEDC 'u'CO\u2082 emission error above 25 g'u'CO\u2082/km')
fcases2.columns.name='# case'
if (len(fcases.dNEDC)) != 0:
    fcases









    



0 cases with an absolute NEDC CO₂ emission error above 25 gCO₂/km



In [11]:

    
#list of filtered cases
# nfcases = new_valuesDF[abs(new_valuesDF.dNEDC) > 25]
# nfcases2 = pd.DataFrame({'Absolute error gCO$_2$ km$^{-1}$':nfcases.dNEDC})
# print((len(nfcases.dNEDC)),'cases with an absolute NEDC 'u'CO\u2082 emission error above 25 g'u'CO\u2082/km')
# nfcases2.columns.name='# case'
# if (len(nfcases.dNEDC)) != 0:
#     nfcases

Error statistics for NEDC, UDC, and EUDC CO$_2$ emission (filtered)



In [12]:

    
#Create a dataframe with the FILETERED NECD, UDC, EUDC error statistics
#removing the cases where the absolute error for NEDC is larger than 25gCO2/km
fvaluesDF = valuesDF[abs(valuesDF.dNEDC) < 25]
ferrorsDF = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['NEDC [gCO$_2$ km$^{-1}$]','UDC [gCO$_2$ km$^{-1}$]', 'EUDC [gCO$_2$ km$^{-1}$]'])
ferrorsDF.loc['Averages'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.mean(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.mean(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.mean(),2)})
ferrorsDF.loc['StdError'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.sem(),2)})
ferrorsDF.loc['Median'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.median(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.median(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.median(),2)})
ferrorsDF.loc['Mode'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.mode().iloc[0],2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.mode().iloc[0],2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.mode().iloc[0],2)})
ferrorsDF.loc['StdDev'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.std(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.std(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.std(),2)})
ferrorsDF.loc['Variance'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.var(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.var(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.var(),2)})
ferrorsDF.loc['Kurtosis'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.kurtosis(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.kurtosis(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.kurtosis(),2)})
ferrorsDF.loc['Skweness'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.skew(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.skew(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.skew(),2)})
ferrorsDF.loc['Range'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round((fvaluesDF.dNEDC.max()-fvaluesDF.dNEDC.min()),2), 'UDC [gCO$_2$ km$^{-1}$]':round((fvaluesDF.dUDC.max()-fvaluesDF.dUDC.min()),2), 'EUDC [gCO$_2$ km$^{-1}$]':round((valuesDF.dEUDC.max()-valuesDF.dEUDC.min()),2)})
ferrorsDF.loc['Minimum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.min(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.min(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.min(),2)})
ferrorsDF.loc['Maximum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.max(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.max(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.max(),2)})
ferrorsDF.loc['Sum'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.sum(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.sum(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.sum(),2)})
ferrorsDF.loc['Count'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dNEDC.count(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dUDC.count(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(fvaluesDF.dEUDC.count(),2)})
ferrorsDF.loc['Confidence level (95%)'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':2*round(fvaluesDF.dNEDC.sem(),2), 'UDC [gCO$_2$ km$^{-1}$]':2*round(fvaluesDF.dUDC.sem(),2), 'EUDC [gCO$_2$ km$^{-1}$]':2*round(fvaluesDF.dEUDC.sem(),2)})
if (len(valuesDF.dNEDC)-len(fvaluesDF.dNEDC)) == 0:
    print('No filtering needed, same statistics as above')
else:
    display(ferrorsDF)









    



No filtering needed, same statistics as above

Distribution of the NEDC, UDC and EUDC errors for filtered cases



In [13]:

    
#NEDC
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
NEDC_hist = fvaluesDF.dNEDC.hist(bins=25, color='green')
NEDC_hist.set_xlabel("NEDC error [gCO$_2$ km$^{-1}$]",fontsize=14)
NEDC_hist.set_ylabel("frequency",fontsize=14)
plt.title('NEDC CO$_2$ emission error distribution', fontsize=20)
plt.ylabel("frequency",fontsize=18)
plt.tick_params(axis='x', which='major', labelsize=16)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xlim(-20, 20)
plt.show()
#UDC
fig = plt.figure(1, figsize=(14, 7))
ax = fig.add_subplot(111)
UDC_hist = fvaluesDF.dUDC.hist(bins=25, color='blue') 
UDC_hist.set_xlabel("UDC error [gCO$_2$ km$^{-1}$]",fontsize=14)
UDC_hist.set_ylabel("frequency",fontsize=14)
plt.title('UDC CO$_2$ emission error distribution', fontsize=20)
plt.ylabel("frequency",fontsize=18)
plt.tick_params(axis='x', which='major', labelsize=16)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xlim(-20, 20)
plt.show()
#EUDC
fig = plt.figure(1, figsize=(14, 7))
ax = fig.add_subplot(111)
EUDC_hist = fvaluesDF.dEUDC.hist(bins=25, color='red') 
EUDC_hist.set_xlabel("EUDC error [gCO$_2$ km$^{-1}$]",fontsize=14)
EUDC_hist.set_ylabel("frequency",fontsize=14)
plt.title('EUDC CO$_2$ emission error distribution', fontsize=20)
plt.ylabel("frequency",fontsize=18)
plt.tick_params(axis='x', which='major', labelsize=16)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xlim(-20, 20)
plt.show()

Comparative emission error per driving cycle (gCO$_2$ km$^{-1}$)



In [14]:

    
#Alternatively show boxplots
#toboxplot = [valuesDF.dNEDC,new_valuesDF.dNEDC,valuesDF.dUDC,new_valuesDF.dUDC,valuesDF.dEUDC,new_valuesDF.dEUDC]
toboxplot = [valuesDF.dNEDC,valuesDF.dUDC,valuesDF.dEUDC]
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(toboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = '#b78adf' )
    ## Custom x-axis labels
#ax.set_xticklabels(['oldNEDC','newNEDC', 'oldUDC','newUDC', 'oldEUDC','newEUDC'],fontsize=20)
ax.set_xticklabels(['NEDC','UDC','EUDC'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('CO$_2$ emission error by driving cycle', fontsize=20)
plt.ylabel("error [gCO$_2$ km$^{-1}$]",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.set_ylim(-30, 30)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The purple box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')









    












    



The purple box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Error statistics per technology type (filtered for absolute errors above 25g CO$_2$ km$^{-1}$)



In [15]:

    
#Create a dataframe with the NECD, UDC, EUDC and vehicle model and case
#CarMod = pd.DataFrame({'dNEDC':new_valuesDF.dNEDC,'dUDC':new_valuesDF.dUDC,'dEUDC':new_valuesDF.dEUDC,'Model code':new_valuesDF.Model,'Case':new_valuesDF.Case, 'index':True})         
CarMod = pd.DataFrame({'dNEDC':valuesDF.dNEDC,'dUDC':valuesDF.dUDC,'dEUDC':valuesDF.dEUDC,'Model code':valuesDF.Model,'Case':valuesDF.Case, 'index':True})         
#filter for absolute errors above 25g CO2 per km
mod_cases = CarMod[abs(CarMod.dNEDC) < 25]



In [16]:

    
#Print a dictionary with the tested technologies and their identification codes
tec = pd.DataFrame(index=['Base case','Gear configuration A','Gear configuration B','No Start/Stop','No Break energy recuperation','Variable valve lifting','Direct injection/Multipoint injection','Thermal management'])
tec['Technology code'] = ['BC','GCA','GCB','NOSS','NOBERS','VVL','DI/MPI','ThM']
tec.columns.name='Technology type'
tec









    Out[16]:






  
    
      Technology type
      Technology code
    
  
  
    
      Base case
      BC
    
    
      Gear configuration A
      GCA
    
    
      Gear configuration B
      GCB
    
    
      No Start/Stop
      NOSS
    
    
      No Break energy recuperation
      NOBERS
    
    
      Variable valve lifting
      VVL
    
    
      Direct injection/Multipoint injection
      DI/MPI
    
    
      Thermal management
      ThM



In [17]:

    
#Function that assigns the number of case to the specific technology tested for each vehicle model
def assign_technol_perCarAndCase(df):
    #looks for the case # in the input file and assigns a technology
    df_basecase = df[mod_cases['Case'] <= 27]
    df_gb1 = df[(mod_cases['Case'] > 27) & (mod_cases['Case'] <= 54)]
    df_gb2 = df[(mod_cases['Case'] > 54) & (mod_cases['Case'] <= 81)]
    df_ss = df[(mod_cases['Case'] > 81) & (mod_cases['Case'] <= 108)]
    df_bers = df[(mod_cases['Case'] > 108) & (mod_cases['Case'] <= 135)]
    #some vehicles have more possible technologies than others (long vs short) and an additional technology assignment is performed for the former group
    In_long = (mod_cases['Model code'] == '500') | (mod_cases['Model code'] == 'A4') | (mod_cases['Model code'] == 'Giulietta') | (mod_cases['Model code'] == 'Polo') | (mod_cases['Model code'] == 'Punto') | (mod_cases['Model code'] == '328i')
    In_short = (mod_cases['Model code'] == '308') | (mod_cases['Model code'] == 'Astra') | (mod_cases['Model code'] == 'X1') | (mod_cases['Model code'] == 'Zafira') | (mod_cases['Model code'] == 'Mokka')| (mod_cases['Model code'] == 'A8')
    I_vvl = (mod_cases['Case'] >= 136) & (mod_cases['Case'] <= 162)
    df_vvl = df[In_long & I_vvl]
    I_dimpi = (mod_cases['Case'] >= 163) & (mod_cases['Case'] <= 189)
    df_dimpi = df[In_long & I_dimpi]
    I_short_tm = (mod_cases['Case'] >= 136)
    I_long_tm = (mod_cases['Case'] >= 190)
    I_tm = (In_short & I_short_tm) | (In_long & I_long_tm)
    df_tm = df[I_tm]
    #Append to the original DF a column with the technology IDcode
    pd.options.mode.chained_assignment = None  # default='warn'
    df_basecase.loc[:,'Tecno'] = 'BC'
    df_gb1.loc[:,'Tecno'] = 'GCA'
    df_gb2.loc[:,'Tecno'] = 'GCB'
    df_ss.loc[:,'Tecno'] = 'NOSS'
    df_bers.loc[:,'Tecno'] = 'NOBERS'
#    df_vvl.loc[:,'Tecno'] = 'VVL'
# df_dimpi.loc[:,'Tecno'] = 'DI/MPI'
    df_tm.loc[:,'Tecno'] = 'ThM'
    bigdata = pd.concat([df_basecase,df_gb1,df_gb2,df_ss,df_bers,df_vvl,df_dimpi,df_tm], ignore_index=False)
    return bigdata



In [18]:

    
#Plot the NEDC errors per technology type in a boxplot
tech = assign_technol_perCarAndCase(mod_cases)
techBC = tech[tech['Tecno'] == 'BC']
techGCA = tech[tech['Tecno'] == 'GCA']
techGCB = tech[tech['Tecno'] == 'GCB']
techNOSS = tech[tech['Tecno'] == 'NOSS']
techBERS = tech[tech['Tecno'] == 'NOBERS']
techVVL = tech[tech['Tecno'] == 'VVL']
techDIMPI = tech[tech['Tecno'] == 'DI/MPI']
techThM = tech[tech['Tecno'] == 'ThM']
techboxplot = [techBC.dNEDC,techGCA.dNEDC,techGCB.dNEDC,techNOSS.dNEDC,techBERS.dNEDC,techVVL.dNEDC,techDIMPI.dNEDC,techThM.dNEDC]
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(techboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = 'green' )
    ## Custom x-axis labels
ax.set_xticklabels(['BC', 'GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('NEDC CO$_2$ emission error by technology type', fontsize=20)
plt.ylabel("error [gCO$_2$ km$^{-1}$]",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=18)
ax.set_ylim(-20, 20)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The green box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')









    












    



The green box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for NEDC CO$_2$ emission error per technology type



In [19]:

    
grouped = tech.groupby('Tecno')
gNEDCmean = grouped.dNEDC.mean()
gNEDCsem = grouped.dNEDC.sem()
gNEDCmedian = grouped.dNEDC.median()
gNEDCstd = grouped.dNEDC.std()
gNEDCvar = grouped.dNEDC.var()
gNEDCskew = grouped.dNEDC.skew()
gNEDCrange = grouped.dNEDC.max()-grouped.dNEDC.min()
gNEDCmin = grouped.dNEDC.min()
gNEDCmax = grouped.dNEDC.max()
gNEDCsum = grouped.dNEDC.sum()
gNEDCcount = grouped.dNEDC.count()
gNEDC_CI95 = 2*grouped.dNEDC.sem()
NEDCerrorsTec = pd.DataFrame(index=['Averages','StdError','Median','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['BC','GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'])
NEDCerrorsTec.loc['Averages'] = pd.Series.round(gNEDCmean,2)
NEDCerrorsTec.loc['StdError'] = pd.Series.round(gNEDCsem,2)
NEDCerrorsTec.loc['Median'] = pd.Series.round(gNEDCmedian,2)
NEDCerrorsTec.loc['StdDev'] = pd.Series.round(gNEDCstd,2)
NEDCerrorsTec.loc['Variance'] = pd.Series.round(gNEDCvar,2)
NEDCerrorsTec.loc['Kurtosis'] = [round(techBC.dNEDC.kurtosis(),2),round(techGCA.dNEDC.kurtosis(),2),round(techGCB.dNEDC.kurtosis(),2),round(techNOSS.dNEDC.kurtosis(),2),round(techBERS.dNEDC.kurtosis(),2),round(techVVL.dNEDC.kurtosis(),2),round(techDIMPI.dNEDC.kurtosis(),2),round(techThM.dNEDC.kurtosis(),2)]
NEDCerrorsTec.loc['Skweness'] = pd.Series.round(gNEDCskew,2)
NEDCerrorsTec.loc['Range'] = pd.Series.round(gNEDCrange,2)
NEDCerrorsTec.loc['Minimum'] = pd.Series.round(gNEDCmin,2)
NEDCerrorsTec.loc['Maximum'] = pd.Series.round(gNEDCmax,2)
NEDCerrorsTec.loc['Sum'] = pd.Series.round(gNEDCsum)
NEDCerrorsTec.loc['Count'] = pd.Series.round(gNEDCcount)
NEDCerrorsTec.loc['Confidence level (95%)'] = pd.Series.round(gNEDC_CI95,2)
NEDCerrorsTec.columns.name='NEDC error'
NEDCerrorsTec









    Out[19]:






  
    
      NEDC error
      BC
      GCA
      GCB
      NOSS
      NOBERS
      VVL
      DI/MPI
      ThM
    
  
  
    
      Averages
      -3.44
      -2
      -2.64
      -7
      -3.27
      NaN
      NaN
      -3.31
    
    
      StdError
      0.22
      0.22
      0.41
      0.26
      0.54
      NaN
      NaN
      0.2
    
    
      Median
      -3.63
      -2.07
      -3.39
      -7.09
      -2.25
      NaN
      NaN
      -3.49
    
    
      StdDev
      1.17
      1.14
      2.13
      1.34
      2.8
      NaN
      NaN
      1.04
    
    
      Variance
      1.37
      1.31
      4.52
      1.78
      7.84
      NaN
      NaN
      1.09
    
    
      Kurtosis
      -0.28
      0.09
      -1.28
      -1.18
      -0.71
      NaN
      NaN
      -0.54
    
    
      Skweness
      0.57
      -0.19
      0.38
      0.11
      -0.54
      NaN
      NaN
      0.36
    
    
      Range
      4.46
      4.65
      7
      4.25
      10.2
      NaN
      NaN
      3.9
    
    
      Minimum
      -5.51
      -4.45
      -5.51
      -8.91
      -8.54
      NaN
      NaN
      -5.22
    
    
      Maximum
      -1.04
      0.2
      1.49
      -4.67
      1.67
      NaN
      NaN
      -1.32
    
    
      Sum
      -96
      -54
      -71
      -189
      -88
      NaN
      NaN
      -89
    
    
      Count
      28
      27
      27
      27
      27
      NaN
      NaN
      27
    
    
      Confidence level (95%)
      0.44
      0.44
      0.82
      0.51
      1.08
      NaN
      NaN
      0.4



In [20]:

    
#Plot the UDC errors per technology type in a boxplot
techboxplot = [techBC.dUDC,techGCA.dUDC,techGCB.dUDC,techNOSS.dUDC,techBERS.dUDC,techVVL.dUDC,techDIMPI.dUDC,techThM.dUDC]
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(techboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = 'blue' )
## Custom x-axis labels
ax.set_xticklabels(['BC', 'GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('UDC CO$_2$ emission error by technology type', fontsize=20)
plt.ylabel("error [gCO$_2$ km$^{-1}$]",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=18)
ax.set_ylim(-30, 30)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The blue box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')









    












    



The blue box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for UDC CO$_2$ emission error per technology type



In [21]:

    
gUDCmean = grouped.dUDC.mean()
gUDCsem = grouped.dUDC.sem()
gUDCmedian = grouped.dUDC.median()
gUDCstd = grouped.dUDC.std()
gUDCvar = grouped.dUDC.var()
gUDCskew = grouped.dUDC.skew()
gUDCrange = grouped.dUDC.max()-grouped.dUDC.min()
gUDCmin = grouped.dUDC.min()
gUDCmax = grouped.dUDC.max()
gUDCsum = grouped.dUDC.sum()
gUDCcount = grouped.dUDC.count()
gUDC_CI95 = 2*grouped.dUDC.sem()
UDCerrorsTec = pd.DataFrame(index=['Averages','StdError','Median','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['BC','GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'])
UDCerrorsTec.loc['Averages'] = pd.Series.round(gUDCmean,2)
UDCerrorsTec.loc['StdError'] = pd.Series.round(gUDCsem,2)
UDCerrorsTec.loc['Median'] = pd.Series.round(gUDCmedian,2)
UDCerrorsTec.loc['StdDev'] = pd.Series.round(gUDCstd,2)
UDCerrorsTec.loc['Variance'] = pd.Series.round(gUDCvar,2)
UDCerrorsTec.loc['Kurtosis'] = [round(techBC.dUDC.kurtosis(),2),round(techGCA.dUDC.kurtosis(),2),round(techGCB.dUDC.kurtosis(),2),round(techNOSS.dUDC.kurtosis(),2),round(techBERS.dUDC.kurtosis(),2),round(techVVL.dUDC.kurtosis(),2),round(techDIMPI.dUDC.kurtosis(),2),round(techThM.dUDC.kurtosis(),2)]
UDCerrorsTec.loc['Skweness'] = pd.Series.round(gUDCskew,2)
UDCerrorsTec.loc['Range'] = pd.Series.round(gUDCrange,2)
UDCerrorsTec.loc['Minimum'] = pd.Series.round(gUDCmin,2)
UDCerrorsTec.loc['Maximum'] = pd.Series.round(gUDCmax,2)
UDCerrorsTec.loc['Sum'] = pd.Series.round(gUDCsum)
UDCerrorsTec.loc['Count'] = pd.Series.round(gUDCcount)
UDCerrorsTec.loc['Confidence level (95%)'] = pd.Series.round(gUDC_CI95,2)
UDCerrorsTec.columns.name='UDC error'
UDCerrorsTec









    Out[21]:






  
    
      UDC error
      BC
      GCA
      GCB
      NOSS
      NOBERS
      VVL
      DI/MPI
      ThM
    
  
  
    
      Averages
      -7.56
      -5.79
      -4.12
      -15.59
      -9.29
      NaN
      NaN
      -7.45
    
    
      StdError
      0.33
      0.46
      0.83
      0.51
      0.87
      NaN
      NaN
      0.26
    
    
      Median
      -7.66
      -5.81
      -4.03
      -15.67
      -7.34
      NaN
      NaN
      -7.37
    
    
      StdDev
      1.74
      2.41
      4.32
      2.63
      4.52
      NaN
      NaN
      1.34
    
    
      Variance
      3.03
      5.8
      18.63
      6.94
      20.45
      NaN
      NaN
      1.78
    
    
      Kurtosis
      0.21
      -0.78
      -1.24
      -0.66
      -0.94
      NaN
      NaN
      -0.92
    
    
      Skweness
      0.22
      0.13
      0.27
      0.27
      -0.53
      NaN
      NaN
      -0.07
    
    
      Range
      7.48
      8.23
      14.53
      9.02
      15.12
      NaN
      NaN
      4.87
    
    
      Minimum
      -11.14
      -9.83
      -9.93
      -19.63
      -17.54
      NaN
      NaN
      -9.87
    
    
      Maximum
      -3.66
      -1.6
      4.6
      -10.62
      -2.42
      NaN
      NaN
      -5
    
    
      Sum
      -212
      -156
      -111
      -421
      -251
      NaN
      NaN
      -201
    
    
      Count
      28
      27
      27
      27
      27
      NaN
      NaN
      27
    
    
      Confidence level (95%)
      0.66
      0.93
      1.66
      1.01
      1.74
      NaN
      NaN
      0.51



In [22]:

    
#Plot the EUDC errors per technology type in a boxplot
techboxplot = [techBC.dEUDC,techGCA.dEUDC,techGCB.dEUDC,techNOSS.dEUDC,techBERS.dEUDC,techVVL.dEUDC,techDIMPI.dEUDC,techThM.dEUDC]
# Create a figure instance
fig = plt.figure(1, figsize=(14, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(techboxplot, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = 'red' )
## Custom x-axis labels
ax.set_xticklabels(['BC', 'GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('EUDC CO$_2$ emission error by technology type', fontsize=20)
plt.ylabel("error [gCO$_2$ km$^{-1}$]",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=18)
ax.set_ylim(-20, 20)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The red box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')









    












    



The red box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Descriptive statistics for EUDC CO$_2$ emission error per technology type



In [23]:

    
gEUDCmean = grouped.dEUDC.mean()
gEUDCsem = grouped.dEUDC.sem()
gEUDCmedian = grouped.dEUDC.median()
gEUDCstd = grouped.dEUDC.std()
gEUDCvar = grouped.dEUDC.var()
gEUDCskew = grouped.dEUDC.skew()
gEUDCrange = grouped.dEUDC.max()-grouped.dEUDC.min()
gEUDCmin = grouped.dEUDC.min()
gEUDCmax = grouped.dEUDC.max()
gEUDCsum = grouped.dEUDC.sum()
gEUDCcount = grouped.dEUDC.count()
gEUDC_CI95 = 2*grouped.dEUDC.sem()
EUDCerrorsTec = pd.DataFrame(index=['Averages','StdError','Median','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['BC','GCA', 'GCB','NOSS','NOBERS','VVL','DI/MPI','ThM'])
EUDCerrorsTec.loc['Averages'] = pd.Series.round(gEUDCmean,2)
EUDCerrorsTec.loc['StdError'] = pd.Series.round(gEUDCsem,2)
EUDCerrorsTec.loc['Median'] = pd.Series.round(gEUDCmedian,2)
EUDCerrorsTec.loc['StdDev'] = pd.Series.round(gEUDCstd,2)
EUDCerrorsTec.loc['Variance'] = pd.Series.round(gEUDCvar,2)
EUDCerrorsTec.loc['Kurtosis'] = [round(techBC.dEUDC.kurtosis(),2),round(techGCA.dEUDC.kurtosis(),2),round(techGCB.dEUDC.kurtosis(),2),round(techNOSS.dEUDC.kurtosis(),2),round(techBERS.dEUDC.kurtosis(),2),round(techVVL.dEUDC.kurtosis(),2),round(techDIMPI.dEUDC.kurtosis(),2),round(techThM.dEUDC.kurtosis(),2)]
EUDCerrorsTec.loc['Skweness'] = pd.Series.round(gEUDCskew,2)
EUDCerrorsTec.loc['Range'] = pd.Series.round(gEUDCrange,2)
EUDCerrorsTec.loc['Minimum'] = pd.Series.round(gEUDCmin,2)
EUDCerrorsTec.loc['Maximum'] = pd.Series.round(gEUDCmax,2)
EUDCerrorsTec.loc['Sum'] = pd.Series.round(gEUDCsum)
EUDCerrorsTec.loc['Count'] = pd.Series.round(gEUDCcount)
EUDCerrorsTec.loc['Confidence level (95%)'] = pd.Series.round(gEUDC_CI95,2)
EUDCerrorsTec.columns.name='EUDC error'
EUDCerrorsTec









    Out[23]:






  
    
      EUDC error
      BC
      GCA
      GCB
      NOSS
      NOBERS
      VVL
      DI/MPI
      ThM
    
  
  
    
      Averages
      -1.05
      0.19
      -1.78
      -1.97
      0.21
      NaN
      NaN
      -0.92
    
    
      StdError
      0.18
      0.22
      0.24
      0.15
      0.36
      NaN
      NaN
      0.21
    
    
      Median
      -1.25
      0.2
      -1.78
      -2.08
      0.75
      NaN
      NaN
      -1.22
    
    
      StdDev
      0.98
      1.13
      1.27
      0.79
      1.89
      NaN
      NaN
      1.07
    
    
      Variance
      0.95
      1.27
      1.62
      0.63
      3.56
      NaN
      NaN
      1.15
    
    
      Kurtosis
      0.8
      -1.1
      0.4
      -0.6
      -0.19
      NaN
      NaN
      -1.1
    
    
      Skweness
      1.27
      -0.02
      -0.56
      0.44
      -0.38
      NaN
      NaN
      0.45
    
    
      Range
      3.39
      3.93
      5.52
      2.89
      7.58
      NaN
      NaN
      3.56
    
    
      Minimum
      -2.25
      -1.66
      -5.05
      -3.13
      -3.33
      NaN
      NaN
      -2.52
    
    
      Maximum
      1.14
      2.28
      0.46
      -0.24
      4.25
      NaN
      NaN
      1.04
    
    
      Sum
      -29
      5
      -48
      -53
      6
      NaN
      NaN
      -25
    
    
      Count
      28
      27
      27
      27
      27
      NaN
      NaN
      27
    
    
      Confidence level (95%)
      0.37
      0.43
      0.49
      0.3
      0.73
      NaN
      NaN
      0.41

Error statistics for engine parameters (NEDC prediction)



In [24]:

    
#Gather and name the engine parameters used in the report according to their name in the CO2MPAS output file
param_a = df['nedc']['prediction']['co2_params a']
param_a2 = df['nedc']['prediction']['co2_params a2']
param_b = df['nedc']['prediction']['co2_params b']
param_c = df['nedc']['prediction']['co2_params c']
param_l = df['nedc']['prediction']['co2_params l']
param_l2 = df['nedc']['prediction']['co2_params l2']
param_t0 = df['nedc']['prediction']['co2_params t0']
param_t1 = df['nedc']['prediction']['co2_params t1']
param_trg = df['nedc']['prediction']['co2_params trg']
#Create a dataframe with this data
paramsDF = pd.DataFrame({'param a': param_a,'param a2':param_a2, 'param b':param_b,'param c': param_c,'param l':param_l, 'param l2':param_l2,'param t0': param_t0,'param t1': param_t1,'param trg':param_trg,'NEDC':NEDC,'NEDC error':dNEDC})                 
paramsDF = paramsDF.dropna()
#print the basic automatic statistics
#paramsDF.describe()
paramsDFstat = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['param a','param a2', 'param b', 'param l', 'param l2', 'param t0', 'param t1','param trg'])
paramsDFstat.loc['Averages'] = pd.Series({'param a':round(paramsDF['param a'].mean(),3), 'param a2':round(paramsDF['param a2'].mean(),3), 'param b':round(paramsDF['param b'].mean(),3),'param c':round(paramsDF['param c'].mean(),3),'param l':round(paramsDF['param l'].mean(),3),'param l2':round(paramsDF['param l2'].mean(),3),'param t0':round(paramsDF['param t0'].mean(),3),'param t1':round(paramsDF['param t1'].mean(),3),'param trg':round(paramsDF['param trg'].mean(),3)})
paramsDFstat.loc['StdError'] = pd.Series({'param a':round(paramsDF['param a'].sem(),3), 'param a2':round(paramsDF['param a2'].sem(),3), 'param b':round(paramsDF['param b'].sem(),3),'param c':round(paramsDF['param c'].sem(),3),'param l':round(paramsDF['param l'].sem(),3),'param l2':round(paramsDF['param l2'].sem(),3),'param t0':round(paramsDF['param t0'].sem(),3),'param t1':round(paramsDF['param t1'].sem(),3),'param trg':round(paramsDF['param trg'].sem(),3)})
paramsDFstat.loc['Median'] = pd.Series({'param a':round(paramsDF['param a'].median(),3), 'param a2':round(paramsDF['param a2'].median(),3), 'param b':round(paramsDF['param b'].median(),3),'param c':round(paramsDF['param c'].median(),3),'param l':round(paramsDF['param l'].median(),3),'param l2':round(paramsDF['param l2'].median(),3),'param t0':round(paramsDF['param t0'].median(),3),'param t1':round(paramsDF['param t1'].median(),3),'param trg':round(paramsDF['param trg'].median(),3)})
paramsDFstat.loc['Mode'] = pd.Series({'param a':round(paramsDF['param a'].mode().iloc[0],3), 'param a2':round(paramsDF['param a2'].mode().iloc[0],3), 'param b':round(paramsDF['param b'].mode().iloc[0],3),'param c':round(paramsDF['param c'].mode().iloc[0],3),'param l':round(paramsDF['param l'].mode().iloc[0],3),'param l2':round(paramsDF['param l2'].mode().iloc[0],3),'param t0':round(paramsDF['param t0'].mode().iloc[0],3),'param t1':round(paramsDF['param t1'].mode().iloc[0],3),'param trg':round(paramsDF['param trg'].mode().iloc[0],3)})
paramsDFstat.loc['StdDev'] = pd.Series({'param a':round(paramsDF['param a'].std(),3), 'param a2':round(paramsDF['param a2'].std(),3), 'param b':round(paramsDF['param b'].std(),3),'param c':round(paramsDF['param c'].std(),3),'param l':round(paramsDF['param l'].std(),3),'param l2':round(paramsDF['param l2'].std(),3),'param t0':round(paramsDF['param t0'].std(),3),'param t1':round(paramsDF['param t1'].std(),3),'param trg':round(paramsDF['param trg'].std(),3)})
paramsDFstat.loc['Variance'] = pd.Series({'param a':round(paramsDF['param a'].var(),3), 'param a2':round(paramsDF['param a2'].var(),3), 'param b':round(paramsDF['param b'].var(),3),'param c':round(paramsDF['param c'].var(),3),'param l':round(paramsDF['param l'].var(),3),'param l2':round(paramsDF['param l2'].var(),3),'param t0':round(paramsDF['param t0'].var(),3),'param t1':round(paramsDF['param t1'].var(),3),'param trg':round(paramsDF['param trg'].var(),3)})
paramsDFstat.loc['Kurtosis'] = pd.Series({'param a':round(paramsDF['param a'].kurtosis(),3), 'param a2':round(paramsDF['param a2'].kurtosis(),3), 'param b':round(paramsDF['param b'].kurtosis(),3),'param c':round(paramsDF['param c'].kurtosis(),3),'param l':round(paramsDF['param l'].kurtosis(),3),'param l2':round(paramsDF['param l2'].kurtosis(),3),'param t0':round(paramsDF['param t0'].kurtosis(),3),'param t1':round(paramsDF['param t1'].kurtosis(),3),'param trg':round(paramsDF['param trg'].kurtosis(),3)})
paramsDFstat.loc['Skweness'] = pd.Series({'param a':round(paramsDF['param a'].skew(),3), 'param a2':round(paramsDF['param a2'].skew(),3), 'param b':round(paramsDF['param b'].skew(),3),'param c':round(paramsDF['param c'].skew(),3),'param l':round(paramsDF['param l'].skew(),3),'param l2':round(paramsDF['param l2'].skew(),3),'param t0':round(paramsDF['param t0'].skew(),3),'param t1':round(paramsDF['param t1'].skew(),3),'param trg':round(paramsDF['param trg'].skew(),3)})
paramsDFstat.loc['Range'] = pd.Series({'param a':round((paramsDF['param a'].max()-paramsDF['param a'].min()),3), 'param a2':round((paramsDF['param a2'].max()-paramsDF['param a2'].min()),3), 'param b':round((paramsDF['param b'].max()-paramsDF['param b'].min()),3),'param c':round((paramsDF['param c'].max()-paramsDF['param c'].min()),3),'param l':round((paramsDF['param l'].max()-paramsDF['param l'].min()),3),'param l2':round((paramsDF['param l2'].max()-paramsDF['param l2'].min()),3),'param t0':round((paramsDF['param t0'].max()-paramsDF['param t0'].min()),3),'param t1':round((paramsDF['param t1'].max()-paramsDF['param t1'].min()),3),'param trg':round((paramsDF['param trg'].max()-paramsDF['param trg'].min()),3)})
paramsDFstat.loc['Minimum'] = pd.Series({'param a':round(paramsDF['param a'].min(),3), 'param a2':round(paramsDF['param a2'].min(),3), 'param b':round(paramsDF['param b'].min(),3),'param c':round(paramsDF['param c'].min(),3),'param l':round(paramsDF['param l'].min(),3),'param l2':round(paramsDF['param l2'].min(),3),'param t0':round(paramsDF['param t0'].min(),3),'param t1':round(paramsDF['param t1'].min(),3),'param trg':round(paramsDF['param trg'].min(),3)})
paramsDFstat.loc['Maximum'] = pd.Series({'param a':round(paramsDF['param a'].max(),3), 'param a2':round(paramsDF['param a2'].max(),3), 'param b':round(paramsDF['param b'].max(),3),'param c':round(paramsDF['param c'].max(),3),'param l':round(paramsDF['param l'].max(),3),'param l2':round(paramsDF['param l2'].max(),3),'param t0':round(paramsDF['param t0'].max(),3),'param t1':round(paramsDF['param t1'].max(),3),'param trg':round(paramsDF['param trg'].max(),3)})
paramsDFstat.loc['Sum'] = pd.Series({'param a':round(paramsDF['param a'].sum(),3), 'param a2':round(paramsDF['param a2'].sum(),3), 'param b':round(paramsDF['param b'].sum(),3),'param c':round(paramsDF['param c'].sum(),3),'param l':round(paramsDF['param l'].sum(),3),'param l2':round(paramsDF['param l2'].sum(),3),'param t0':round(paramsDF['param t0'].sum(),3),'param t1':round(paramsDF['param t1'].sum(),3),'param trg':round(paramsDF['param trg'].sum(),3)})
paramsDFstat.loc['Count'] = pd.Series({'param a':round(paramsDF['param a'].count(),3), 'param a2':round(paramsDF['param a2'].count(),3), 'param b':round(paramsDF['param b'].count(),3),'param c':round(paramsDF['param c'].count(),3),'param l':round(paramsDF['param l'].count(),3),'param l2':round(paramsDF['param l2'].count(),3),'param t0':round(paramsDF['param t0'].count(),3),'param t1':round(paramsDF['param t1'].count(),3),'param trg':round(paramsDF['param trg'].count(),3)})
paramsDFstat.loc['Confidence level (95%)'] = pd.Series({'param a':2*round(paramsDF['param a'].sem(),3), 'param a2':2*round(paramsDF['param a2'].sem(),3), 'param b':2*round(paramsDF['param b'].sem(),3),'param c':2*round(paramsDF['param c'].sem(),3),'param l':2*round(paramsDF['param l'].sem(),3),'param l2':2*round(paramsDF['param l2'].sem(),3),'param t0':2*round(paramsDF['param t0'].sem(),3),'param t1':2*round(paramsDF['param t1'].sem(),3),'param trg':2*round(paramsDF['param trg'].sem(),3)})
paramsDFstat









    Out[24]:






  
    
      
      param a
      param a2
      param b
      param l
      param l2
      param t0
      param t1
      param trg
    
  
  
    
      Averages
      0.078
      -0
      0.024
      -1.566
      -0.011
      2.539
      2.579
      96.71
    
    
      StdError
      0.002
      0
      0.001
      0.013
      0.001
      0.026
      0.023
      0.014
    
    
      Median
      0.079
      -0
      0.024
      -1.551
      -0.01
      2.535
      2.616
      96.688
    
    
      Mode
      0.137
      -0
      -0.003
      -1.666
      0.002
      2.632
      2.679
      96.538
    
    
      StdDev
      0.026
      0
      0.011
      0.165
      0.01
      0.331
      0.29
      0.173
    
    
      Variance
      0.001
      0
      0
      0.027
      0
      0.109
      0.084
      0.03
    
    
      Kurtosis
      0.38
      -3.056
      -0.354
      3.695
      0.626
      0.177
      -0.376
      -0.924
    
    
      Skweness
      -0.037
      -3.758
      -0.273
      -1.179
      -0.005
      0.455
      -0.573
      0.206
    
    
      Range
      0.159
      0.001
      0.052
      1.172
      0.061
      1.615
      1.245
      0.759
    
    
      Minimum
      0.01
      -0.001
      -0.008
      -2.382
      -0.041
      1.778
      1.881
      96.391
    
    
      Maximum
      0.169
      -0
      0.044
      -1.21
      0.02
      3.393
      3.126
      97.151
    
    
      Sum
      12.763
      -0.022
      3.959
      -255.256
      -1.768
      413.801
      420.39
      15763.8
    
    
      Count
      163
      163
      163
      163
      163
      163
      163
      163
    
    
      Confidence level (95%)
      0.004
      0
      0.002
      0.026
      0.002
      0.052
      0.046
      0.028

Error statistics for engine parameters applying a filtering of NEDC absolute error < 25 gCO$_2$ km$^{-1}$



In [26]:

    
#filter for absolute errors above 25g CO2 per km
# fparamsDF = paramsDF[abs(paramsDF['NEDC error']) < 25]
# fparamsDFstat = pd.DataFrame(index=['Averages','StdError','Median','Mode','StdDev','Variance','Kurtosis','Skweness','Range','Minimum','Maximum','Sum','Count','Confidence level (95%)'], columns=['param a','param a2', 'param b', 'param l', 'param l2', 'param t', 'param trg'])
# fparamsDFstat.loc['Averages'] = pd.Series({'param a':round(fparamsDF['param a'].mean(),3), 'param a2':round(fparamsDF['param a2'].mean(),3), 'param b':round(fparamsDF['param b'].mean(),3),'param l':round(fparamsDF['param l'].mean(),3),'param l2':round(fparamsDF['param l2'].mean(),3),'param t':round(fparamsDF['param t'].mean(),3),'param trg':round(fparamsDF['param trg'].mean(),3)})
# fparamsDFstat.loc['StdError'] = pd.Series({'param a':round(fparamsDF['param a'].sem(),3), 'param a2':round(fparamsDF['param a2'].sem(),3), 'param b':round(fparamsDF['param b'].sem(),3),'param l':round(fparamsDF['param l'].sem(),3),'param l2':round(fparamsDF['param l2'].sem(),3),'param t':round(fparamsDF['param t'].sem(),3),'param trg':round(fparamsDF['param trg'].sem(),3)})
# fparamsDFstat.loc['Median'] = pd.Series({'param a':round(fparamsDF['param a'].median(),3), 'param a2':round(fparamsDF['param a2'].median(),3), 'param b':round(fparamsDF['param b'].median(),3),'param l':round(fparamsDF['param l'].median(),3),'param l2':round(fparamsDF['param l2'].median(),3),'param t':round(fparamsDF['param t'].median(),3),'param trg':round(fparamsDF['param trg'].median(),3)})
# fparamsDFstat.loc['Mode'] = pd.Series({'param a':round(fparamsDF['param a'].mode().iloc[0],3), 'param a2':round(fparamsDF['param a2'].mode().iloc[0],3), 'param b':round(fparamsDF['param b'].mode().iloc[0],3),'param l':round(fparamsDF['param l'].mode().iloc[0],3),'param l2':round(fparamsDF['param l2'].mode().iloc[0],3),'param t':round(fparamsDF['param t'].mode().iloc[0],3),'param trg':round(fparamsDF['param trg'].mode().iloc[0],3)})
# fparamsDFstat.loc['StdDev'] = pd.Series({'param a':round(fparamsDF['param a'].std(),3), 'param a2':round(fparamsDF['param a2'].std(),3), 'param b':round(fparamsDF['param b'].std(),3),'param l':round(fparamsDF['param l'].std(),3),'param l2':round(fparamsDF['param l2'].std(),3),'param t':round(fparamsDF['param t'].std(),3),'param trg':round(fparamsDF['param trg'].std(),3)})
# fparamsDFstat.loc['Variance'] = pd.Series({'param a':round(fparamsDF['param a'].var(),3), 'param a2':round(fparamsDF['param a2'].var(),3), 'param b':round(fparamsDF['param b'].var(),3),'param l':round(fparamsDF['param l'].var(),3),'param l2':round(fparamsDF['param l2'].var(),3),'param t':round(fparamsDF['param t'].var(),3),'param trg':round(fparamsDF['param trg'].var(),3)})
# fparamsDFstat.loc['Kurtosis'] = pd.Series({'param a':round(fparamsDF['param a'].kurtosis(),3), 'param a2':round(fparamsDF['param a2'].kurtosis(),3), 'param b':round(fparamsDF['param b'].kurtosis(),3),'param l':round(fparamsDF['param l'].kurtosis(),3),'param l2':round(fparamsDF['param l2'].kurtosis(),3),'param t':round(fparamsDF['param t'].kurtosis(),3),'param trg':round(fparamsDF['param trg'].kurtosis(),3)})
# fparamsDFstat.loc['Skweness'] = pd.Series({'param a':round(fparamsDF['param a'].skew(),3), 'param a2':round(fparamsDF['param a2'].skew(),3), 'param b':round(fparamsDF['param b'].skew(),3),'param l':round(fparamsDF['param l'].skew(),3),'param l2':round(fparamsDF['param l2'].skew(),3),'param t':round(fparamsDF['param t'].skew(),3),'param trg':round(fparamsDF['param trg'].skew(),3)})
# fparamsDFstat.loc['Range'] = pd.Series({'param a':round((fparamsDF['param a'].max()-paramsDF['param a'].min()),3), 'param a2':round((fparamsDF['param a2'].max()-paramsDF['param a2'].min()),3), 'param b':round((fparamsDF['param b'].max()-paramsDF['param b'].min()),3),'param l':round((fparamsDF['param l'].max()-paramsDF['param l'].min()),3),'param l2':round((fparamsDF['param l2'].max()-paramsDF['param l2'].min()),3),'param t':round((fparamsDF['param t'].max()-paramsDF['param t'].min()),3),'param trg':round((fparamsDF['param trg'].max()-paramsDF['param trg'].min()),3)})
# fparamsDFstat.loc['Minimum'] = pd.Series({'param a':round(fparamsDF['param a'].min(),3), 'param a2':round(fparamsDF['param a2'].min(),3), 'param b':round(fparamsDF['param b'].min(),3),'param l':round(fparamsDF['param l'].min(),3),'param l2':round(fparamsDF['param l2'].min(),3),'param t':round(fparamsDF['param t'].min(),3),'param trg':round(fparamsDF['param trg'].min(),3)})
# fparamsDFstat.loc['Maximum'] = pd.Series({'param a':round(fparamsDF['param a'].max(),3), 'param a2':round(fparamsDF['param a2'].max(),3), 'param b':round(fparamsDF['param b'].max(),3),'param l':round(fparamsDF['param l'].max(),3),'param l2':round(fparamsDF['param l2'].max(),3),'param t':round(fparamsDF['param t'].max(),3),'param trg':round(fparamsDF['param trg'].max(),3)})
# fparamsDFstat.loc['Sum'] = pd.Series({'param a':round(fparamsDF['param a'].sum(),3), 'param a2':round(fparamsDF['param a2'].sum(),3), 'param b':round(fparamsDF['param b'].sum(),3),'param l':round(fparamsDF['param l'].sum(),3),'param l2':round(fparamsDF['param l2'].sum(),3),'param t':round(fparamsDF['param t'].sum(),3),'param trg':round(fparamsDF['param trg'].sum(),3)})
# fparamsDFstat.loc['Count'] = pd.Series({'param a':round(fparamsDF['param a'].count(),3), 'param a2':round(fparamsDF['param a2'].count(),3), 'param b':round(fparamsDF['param b'].count(),3),'param l':round(fparamsDF['param l'].count(),3),'param l2':round(fparamsDF['param l2'].count(),3),'param t':round(fparamsDF['param t'].count(),3),'param trg':round(fparamsDF['param trg'].count(),3)})
# fparamsDFstat.loc['Confidence level (95%)'] = pd.Series({'param a':2*round(fparamsDF['param a'].sem(),3), 'param a2':2*round(fparamsDF['param a2'].sem(),3), 'param b':2*round(fparamsDF['param b'].sem(),3),'param l':2*round(fparamsDF['param l'].sem(),3),'param l2':2*round(fparamsDF['param l2'].sem(),3),'param t':2*round(fparamsDF['param t'].sem(),3),'param trg':2*round(fparamsDF['param trg'].sem(),3)})
# if (len(paramsDF['NEDC error'])-len(fparamsDF['NEDC error'])) == 0:
#     print('No filtering needed, same statistics as above')
# else:
#     print('Filtered statistics')
#     display(fparamsDFstat)

Distribution of the engine parameters values for filtered cases



In [27]:

    
#Histogram for each engine parameter
#create a list with all the available engine parameters
paramlist = list(sorted(fparamsDF.columns.unique()))
for p in range(2,(len(paramlist))):
    tit = paramlist[p] + ' distribution'
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(tit,fontsize=20)
    plot = fig.add_subplot(111)
    # We change the fontsize of minor ticks label 
    plot.tick_params(axis='x', which='major', labelsize=16)
    plot.tick_params(axis='y', which='major', labelsize=16)
    par_hist = fparamsDF[paramlist[p]].hist(bins=25, color='grey')
    par_hist.set_xlabel(paramlist[p],fontsize=20)
    par_hist.set_ylabel("frequency",fontsize=20)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
    plt.show()



In [33]:

    
#Alternatively show normalized error boxplot for all engine parameters
paramsbp = fparamsDF.drop(fparamsDF.columns[[0,1]], axis = 1)
paramsbp_norm = (paramsbp - paramsbp.mean()) / (paramsbp.max() - paramsbp.min())
# Create a figure instance
fig = plt.figure(1, figsize=(16, 7))
# Create an axes instance
ax = fig.add_subplot(111)
# Create the boxplot with fill color
bp = ax.boxplot(paramsbp_norm.values, sym='', patch_artist=True, whis=10000, showmeans=True, meanprops=(dict(marker='o',markerfacecolor='yellow')))
for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=1)
    # change fill color
    box.set( facecolor = '#b78adf' )
## Custom x-axis labels
ax.set_xticklabels(['param a', 'param a2', 'param b', 'param c', 'param l', 'param l2', 'param t0','param t1', 'param trg'],fontsize=20)
## Remove top axes and right axes ticks
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#Set y axis title
plt.title('Normalized CO$_2$ emission error per engine parameter', fontsize=20)
plt.ylabel("normalized parameter error",fontsize=18)
plt.tick_params(axis='y', which='major', labelsize=16)
ax.set_ylim(-1, 1)
plt.setp(bp['medians'], color = 'purple', linewidth = 2)
plt.show()
print('The purple box represents the 1st and 3rd quartile.\nThe dark purple line is the median.\nThe yellow dot is the mean.\nthe whiskers show the min and max values.')









    












    



The purple box represents the 1st and 3rd quartile.
The dark purple line is the median.
The yellow dot is the mean.
the whiskers show the min and max values.

Correlation between all engine parameters and NEDC error. All vehicles



In [29]:

    
#Create a heatmap with the correlation of all the engine parameters and the NEDC error
fparamNEDCerror = fparamsDF.drop('NEDC', 1)
#from seaborn.apionly import heatmap, diverging_palette
import seaborn as sns
sns.set()
# Compute the correlation matrix
corr = fparamNEDCerror.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(16, 12))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, center = 0, linewidths=.1,  annot = True, annot_kws={"size":14}, square = True)
plt.title('Engine parameters vs engine parameters. Correlation heatmap.',fontsize=22)
plt.yticks(fontsize = 14) 
plt.xticks(fontsize = 14, rotation = 1)
cax = plt.gcf().axes[-1]
cax.tick_params(labelsize=16)
plt.show()



In [30]:

    
#Avoid using seaborn templates and go back to matplotlib templates
mpl.rcParams.update(inline_rc)

Section 2. Performance of the model. Statistics per vehicle model and case test.

Glossary of vehicle models and number of test cases considered in the report



In [35]:

    
mod_cases_stats = mod_cases.groupby(['Model code'],as_index=False).count() 
#mod_cases_stats['Brand and model'] = ['Peugeot 308','Fiat 500','Audi A4','Opel Astra','Alfa Romeo Giulietta','Volkswagen Polo','Fiat Punto','BMW X1','Opel Zafira']
mod_cases_stats['Brand and model'] = ['Audi A8']
cols = mod_cases_stats.columns.tolist()
cols = cols[-1:] + cols[:2]
mod_cases_stats = mod_cases_stats[cols]
mod_cases_stats









    Out[35]:






  
    
      
      Brand and model
      Model code
      Case
    
  
  
    
      0
      Audi A8
      A8
      163

NEDC, UDC, and EUDC CO$_2$ emission error per vehicle model (filtered for NEDC CO$_2$ emission absolute error < 25 gCO$_2$ km$^{-1}$)



In [36]:

    
#Create a dataframe with the NECD, UDC, EUDC and vehicle model and case
# CarMod = pd.DataFrame({'dNEDC':dNEDC,'dUDC':dUDC,'dEUDC':dEUDC,'Model code':model,'Case':cases})         
#filter for absolute errors above 25g CO2 per km
CarMod = tech[abs(tech.dNEDC) < 25]
#in order to create statistic tables and plots for each model car, a numeric car ID 'cid' has to be assigned to each vehicle
Carlist = list(sorted(CarMod['Model code'].unique()))
Cidlist = list(range(len(Carlist)))
CarMod.cid = CarMod['Model code'].replace(Carlist, Cidlist, regex = True)
CarMod['cod'] = CarMod.cid
dictecnos = {'BC':'o', 'GCA':'s', 'GCB':'v', 'NOSS':'p','NOBERS':'D','VVL':'4','DI/MPI':'+','ThM':'*'}
#Create a table with the error statistics for each car model
for x in range(0,len(Cidlist)):
    Car = CarMod[CarMod.cod == x]
    grouped = Car.groupby('Tecno')
    CarDF = pd.DataFrame(index=['Averages','Median', 'StdDev'], columns=['NEDC [gCO$_2$ km$^{-1}$]','UDC [gCO$_2$ km$^{-1}$]', 'EUDC [gCO$_2$ km$^{-1}$]'])
    CarDF.loc['Averages'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.mean(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.mean(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.mean(),2)})
    CarDF.loc['Median'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.median(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.median(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.median(),2)})
    CarDF.loc['StdDev'] = pd.Series({'NEDC [gCO$_2$ km$^{-1}$]':round(Car.dNEDC.std(),2), 'UDC [gCO$_2$ km$^{-1}$]':round(Car.dUDC.std(),2), 'EUDC [gCO$_2$ km$^{-1}$]':round(Car.dEUDC.std(),2)})
    CarDF.columns.name=Car.iat[0,1]
    display(CarDF)
    #plot the NEDC CO2 emission error histogram per vehicle model
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(-15, 15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
    car_NEDC_hist = Car.dNEDC.hist(bins=25, color='green')
    car_NEDC_hist.set_xlabel("NEDC CO$_2$ emission error [gCO$_2$ km$^{-1}$]",fontsize=20)
    car_NEDC_hist.set_ylabel("frequency",fontsize=20)
    plt.show()
    #plot the NEDC error per vehicle and for all cases
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(0, len(Car.Case))
    plot.set_ylim(-15,15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
#     car_scat = plt.scatter(Car.Case, Car.dNEDC, color='green')
    for key, group in grouped:
        plt.plot(group['Case'], group['dNEDC'], color='green', marker=dictecnos[key], label = key, linestyle='')
        first_legend = plt.legend(numpoints=1, bbox_to_anchor=(1.0, 1.), loc=1, borderaxespad=0.)
        plot.ax = plt.gca().add_artist(first_legend)
    plot.set_xlabel("Case #",fontsize=20)
    plot.set_ylabel("NEDC error [gCO$_2$ km$^{-1}$]",fontsize=20)
    line1 = plot.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5 gCO$_2$ km$^{-1}$')
    line2 = plot.axhline(y=2.5, color='grey', linestyle='-.')
    line3 = plot.axhline(y=-4, color='black', linestyle='--', label='± 4.0 gCO$_2$ km$^{-1}$')
    line4 = plot.axhline(y=4, color='black', linestyle='--')
    plt.legend(handles=[line1, line3], loc = 3)
    plt.show()
    #plot the UDC CO2 emission error histogram per vehicle model
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(-15, 15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
    car_UDC_hist = Car.dUDC.hist(bins=25, color='blue')
    car_UDC_hist.set_xlabel("UDC CO$_2$ emission error [gCO$_2$ km$^{-1}$]",fontsize=20)
    car_UDC_hist.set_ylabel("frequency",fontsize=20)
    plt.show()
    #plot the UDC error per vehicle and for all cases
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(0, len(Car.Case))
    plot.set_ylim(-15,15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
#     car_scat = plt.scatter(Car.Case, Car.dUDC, color='blue')
    for key, group in grouped:
        plt.plot(group['Case'], group['dUDC'], color='blue', marker=dictecnos[key], label = key, linestyle='')
        first_legend = plt.legend(numpoints=1, bbox_to_anchor=(1.0, 1.), loc=1, borderaxespad=0.)
        plot.ax = plt.gca().add_artist(first_legend)
    plot.set_xlabel("Case #",fontsize=20)
    plot.set_ylabel("UDC error [gCO$_2$ km$^{-1}$]",fontsize=20)
    line1 = plot.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5 gCO$_2$ km$^{-1}$')
    line2 = plot.axhline(y=2.5, color='grey', linestyle='-.')
    line3 = plot.axhline(y=-4, color='black', linestyle='--', label='± 4.0 gCO$_2$ km$^{-1}$')
    line4 = plot.axhline(y=4, color='black', linestyle='--')
    plt.legend(handles=[line1, line3], loc = 3)
    plt.show()
    #plot the EUDC CO2 emission error histogram per vehicle model
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(-15, 15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
    car_EUDC_hist = Car.dEUDC.hist(bins=25, color='red')
    car_EUDC_hist.set_xlabel("EUDC CO$_2$ emission error [gCO$_2$ km$^{-1}$]",fontsize=20)
    car_EUDC_hist.set_ylabel("frequency",fontsize=20)
    plt.show()
    #plot the EUDC error per vehicle and for all cases
    fig = plt.figure(1, figsize=(14, 7))
    plt.title(Car.iat[0,1],fontsize=20)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='x', which='major', labelsize=14)
    plot.tick_params(axis='y', which='major', labelsize=14)
    plot.set_xlim(0, len(Car.Case))
    plot.set_ylim(-15,15)
    plot.get_xaxis().tick_bottom()
    plot.get_yaxis().tick_left()
    #car_scat = plt.scatter(Car.Case, Car.dEUDC, color='red')
    for key, group in grouped:
        plt.plot(group['Case'], group['dEUDC'], color='red', marker=dictecnos[key], label = key, linestyle='')
        first_legend = plt.legend(numpoints=1, bbox_to_anchor=(1.0, 1.), loc=1, borderaxespad=0.)
        plot.ax = plt.gca().add_artist(first_legend)
    plot.set_xlabel("Case #",fontsize=20)
    plot.set_ylabel("EUDC error [gCO$_2$ km$^{-1}$]",fontsize=20)
    line1 = plot.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5 gCO$_2$ km$^{-1}$')
    line2 = plot.axhline(y=2.5, color='grey', linestyle='-.')
    line3 = plot.axhline(y=-4, color='black', linestyle='--', label='± 4.0 gCO$_2$ km$^{-1}$')
    line4 = plot.axhline(y=4, color='black', linestyle='--')
    plt.legend(handles=[line1, line3], loc = 3)
    plt.show()









    






  
    
      A8
      NEDC [gCO$_2$ km$^{-1}$]
      UDC [gCO$_2$ km$^{-1}$]
      EUDC [gCO$_2$ km$^{-1}$]
    
  
  
    
      Averages
      -3.61
      -8.3
      -0.89
    
    
      Median
      -3.53
      -7.46
      -1.13
    
    
      StdDev
      2.33
      4.72
      1.48

NEDC error vs engine parameters per vehicle model



In [37]:

    
#Create a dataframe with the engine parameters, the model of the vehicle and the NEDC error for filtering
parCarDF = paramsDF
parCarDF['carmodel'] = model
# parCarDF['carid'] = CarMod.cod
parCarDF
fparCarDF = parCarDF[parCarDF['NEDC error'] < 25]
groups = fparCarDF.groupby('carmodel')



In [38]:

    
#Plotting the filtered NEDC error vs engine parameters for each vehicle model
for p in range(2,(len(paramlist))):
    fig = plt.figure(1, figsize=(14, 7))
    plot = fig.add_subplot(111)
    plot.margins(0.18)
    plot.set_prop_cycle(cycler('color', ['#5d8aa8','#e52b50','#ffbf00','#9966cc','#a4c639','#cd9575','#fbceb1','#00ffff','#b2beb5']))
    for name, group in groups:
        plt.plot(group[paramlist[p]], group['NEDC error'], marker='o', linestyle='', ms=6, label=name)
        plt.tick_params(axis='x', which='major', labelsize=14)
        plt.tick_params(axis='y', which='major', labelsize=14)
        plot.set_ylim(-15,15)
        plot.get_xaxis().tick_bottom()
        plot.get_yaxis().tick_left()
        first_legend = plt.legend(numpoints=1, bbox_to_anchor=(1.0, 1.), loc=1, borderaxespad=0.)
        plot.ax = plt.gca().add_artist(first_legend)
    plot.set_xlabel(paramlist[p],fontsize=20)
    plot.set_ylabel("NEDC error [gCO$_2$ km$^{-1}$]",fontsize=20)
    line1 = plt.axhline(y=-2.5, color='grey', linestyle='-.', label='± 2.5 gCO$_2$ km$^{-1}$')
    line2 = plt.axhline(y=2.5, color='grey', linestyle='-.')
    line3 = plt.axhline(y=-4, color='black', linestyle='--', label='± 4.0 gCO$_2$ km$^{-1}$')
    line4 = plt.axhline(y=4, color='black', linestyle='--')
    plt.legend(handles=[line1, line3], loc=3)
    plt.show()

Engine parameters vs engine parameters. Scatterplot per vehicle.



In [39]:

    
#plot engine parameters ones against the others and inform about vehicles models
import seaborn as sns
sns.set()
scatterDF = fparCarDF.drop('NEDC', 1)
sns.pairplot(scatterDF, hue="carmodel")#, palette = 'jajaj')#, kind = 'reg')
plt.show()

	NEDC [gCO$_2$ km$^{-1}$]	UDC [gCO$_2$ km$^{-1}$]	EUDC [gCO$_2$ km$^{-1}$]
Averages	-3.61	-8.3	-0.89
StdError	0.18	0.37	0.12
Median	-3.53	-7.46	-1.13
Mode	-3.38939	-7.36109	-1.09069
StdDev	2.33	4.72	1.48
Variance	5.43	22.3	2.21
Kurtosis	-0.25	0.23	0.12
Skweness	-0.39	-0.39	0.39
Range	10.58	24.23	9.3
Minimum	-8.91	-19.63	-5.05
Maximum	1.67	4.6	4.25
Sum	-588.49	-1352.3	-144.78
Count	163	163	163
Confidence level (95%)	0.36	0.74	0.24

Technology type	Technology code
Base case	BC
Gear configuration A	GCA
Gear configuration B	GCB
No Start/Stop	NOSS
No Break energy recuperation	NOBERS
Variable valve lifting	VVL
Direct injection/Multipoint injection	DI/MPI
Thermal management	ThM

NEDC error	BC	GCA	GCB	NOSS	NOBERS	VVL	DI/MPI	ThM
Averages	-3.44	-2	-2.64	-7	-3.27	NaN	NaN	-3.31
StdError	0.22	0.22	0.41	0.26	0.54	NaN	NaN	0.2
Median	-3.63	-2.07	-3.39	-7.09	-2.25	NaN	NaN	-3.49
StdDev	1.17	1.14	2.13	1.34	2.8	NaN	NaN	1.04
Variance	1.37	1.31	4.52	1.78	7.84	NaN	NaN	1.09
Kurtosis	-0.28	0.09	-1.28	-1.18	-0.71	NaN	NaN	-0.54
Skweness	0.57	-0.19	0.38	0.11	-0.54	NaN	NaN	0.36
Range	4.46	4.65	7	4.25	10.2	NaN	NaN	3.9
Minimum	-5.51	-4.45	-5.51	-8.91	-8.54	NaN	NaN	-5.22
Maximum	-1.04	0.2	1.49	-4.67	1.67	NaN	NaN	-1.32
Sum	-96	-54	-71	-189	-88	NaN	NaN	-89
Count	28	27	27	27	27	NaN	NaN	27
Confidence level (95%)	0.44	0.44	0.82	0.51	1.08	NaN	NaN	0.4

UDC error	BC	GCA	GCB	NOSS	NOBERS	VVL	DI/MPI	ThM
Averages	-7.56	-5.79	-4.12	-15.59	-9.29	NaN	NaN	-7.45
StdError	0.33	0.46	0.83	0.51	0.87	NaN	NaN	0.26
Median	-7.66	-5.81	-4.03	-15.67	-7.34	NaN	NaN	-7.37
StdDev	1.74	2.41	4.32	2.63	4.52	NaN	NaN	1.34
Variance	3.03	5.8	18.63	6.94	20.45	NaN	NaN	1.78
Kurtosis	0.21	-0.78	-1.24	-0.66	-0.94	NaN	NaN	-0.92
Skweness	0.22	0.13	0.27	0.27	-0.53	NaN	NaN	-0.07
Range	7.48	8.23	14.53	9.02	15.12	NaN	NaN	4.87
Minimum	-11.14	-9.83	-9.93	-19.63	-17.54	NaN	NaN	-9.87
Maximum	-3.66	-1.6	4.6	-10.62	-2.42	NaN	NaN	-5
Sum	-212	-156	-111	-421	-251	NaN	NaN	-201
Count	28	27	27	27	27	NaN	NaN	27
Confidence level (95%)	0.66	0.93	1.66	1.01	1.74	NaN	NaN	0.51

EUDC error	BC	GCA	GCB	NOSS	NOBERS	VVL	DI/MPI	ThM
Averages	-1.05	0.19	-1.78	-1.97	0.21	NaN	NaN	-0.92
StdError	0.18	0.22	0.24	0.15	0.36	NaN	NaN	0.21
Median	-1.25	0.2	-1.78	-2.08	0.75	NaN	NaN	-1.22
StdDev	0.98	1.13	1.27	0.79	1.89	NaN	NaN	1.07
Variance	0.95	1.27	1.62	0.63	3.56	NaN	NaN	1.15
Kurtosis	0.8	-1.1	0.4	-0.6	-0.19	NaN	NaN	-1.1
Skweness	1.27	-0.02	-0.56	0.44	-0.38	NaN	NaN	0.45
Range	3.39	3.93	5.52	2.89	7.58	NaN	NaN	3.56
Minimum	-2.25	-1.66	-5.05	-3.13	-3.33	NaN	NaN	-2.52
Maximum	1.14	2.28	0.46	-0.24	4.25	NaN	NaN	1.04
Sum	-29	5	-48	-53	6	NaN	NaN	-25
Count	28	27	27	27	27	NaN	NaN	27
Confidence level (95%)	0.37	0.43	0.49	0.3	0.73	NaN	NaN	0.41

	param a	param a2	param b	param l	param l2	param t0	param t1	param trg
Averages	0.078	-0	0.024	-1.566	-0.011	2.539	2.579	96.71
StdError	0.002	0	0.001	0.013	0.001	0.026	0.023	0.014
Median	0.079	-0	0.024	-1.551	-0.01	2.535	2.616	96.688
Mode	0.137	-0	-0.003	-1.666	0.002	2.632	2.679	96.538
StdDev	0.026	0	0.011	0.165	0.01	0.331	0.29	0.173
Variance	0.001	0	0	0.027	0	0.109	0.084	0.03
Kurtosis	0.38	-3.056	-0.354	3.695	0.626	0.177	-0.376	-0.924
Skweness	-0.037	-3.758	-0.273	-1.179	-0.005	0.455	-0.573	0.206
Range	0.159	0.001	0.052	1.172	0.061	1.615	1.245	0.759
Minimum	0.01	-0.001	-0.008	-2.382	-0.041	1.778	1.881	96.391
Maximum	0.169	-0	0.044	-1.21	0.02	3.393	3.126	97.151
Sum	12.763	-0.022	3.959	-255.256	-1.768	413.801	420.39	15763.8
Count	163	163	163	163	163	163	163	163
Confidence level (95%)	0.004	0	0.002	0.026	0.002	0.052	0.046	0.028