Transport network metrics (dis)similarities generalization

Bramka Arga Jafino

Delft University of Technology

Faculty of Technology, Policy and Management

An introduction note

In the previous analysis, metrics (dis)similarities (i.e. the degree of overlap/complementarity) were analyzed for the whole transport network in Bangladesh. Therefore, the conclusion from the (dis)similarities may not hold true if an analysis is conducted for different transport network.

To test the generalizability of the metrics (dis)similarities, the criticality analysis was redone to seven subnetworks comprising seven divisions in Bangladesh. The results of the criticality metrics from each subnetwork are analyzed by using Spearman-rank correlation coefficient in this notebook.

0. Import all required modules and files + Helper functions


In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.pylab import *
import matplotlib.colors as colors
import seaborn as sns
from __future__ import division

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

#Modules developed for this project
from transport_network_modeling import network_visualization as net_v
from transport_network_modeling import criticality as crit


C:\Users\bjafino\anaconda3\envs\py27\lib\site-packages\ema_workbench\em_framework\optimization.py:22: ImportWarning: platypus based optimization not available
  warnings.warn("platypus based optimization not available", ImportWarning)

In [2]:
sys.version


Out[2]:
'2.7.14 |Anaconda custom (64-bit)| (default, Oct 15 2017, 03:34:40) [MSC v.1500 64 bit (AMD64)]'

In [3]:
#the criticality results for the subnetworks were stored in pickle format
#the following function is used to load the pickle files
import pickle
def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
    
def calculate_correlation(file_loc):
    res_dict = load_obj(file_loc)

    crit_df = pd.DataFrame.from_dict(res_dict, orient='index')
    crit_df.columns = ['m3_01', 'm3_02', 'm5_01', 'm8_02', 'm10', 'm1_01', 'm1_02', 
                         'm2_01', 'm2_02','m4_02', 'm6_01', 'm7_01', 'm7_02', 'm7_03', 'm9_01', 'm4_01', 'm8_01', 'm8_03']
    crit_df['osmid'] = crit_df.index
    crit_df.index = np.arange(0,len(crit_df),1)

    #alter wrong values
    crit_df['m2_02'] = crit_df.m2_02.apply(lambda val: 0 if val < 1.39e-10 else val)
    crit_df['m5_01'] = crit_df.m5_01.apply(lambda x: 1/x if x > 0 else 2)

    crit_df2 = crit_df[['osmid','m1_01', 'm1_02', 'm2_01', 'm2_02', 'm3_01', 'm3_02', 'm4_01', 'm4_02', 'm5_01', 'm6_01',
                 'm7_01', 'm7_02', 'm7_03', 'm8_01', 'm8_02', 'm8_03', 'm9_01', 'm10']]
    crit_df2.columns = ['osmid','m01_01', 'm01_02', 'm02_01', 'm02_02', 'm03_01', 'm03_02', 'm04_01', 'm04_02', 'm05_01', 'm06_01',
                 'm07_01', 'm07_02', 'm07_03', 'm08_01', 'm08_02', 'm08_03', 'm09_01', 'm10']
    
    #record top 100 critical links
    n=100
    topn_list = []
    for metric in all_metric:
        new_data = crit_df2.loc[crit_df2[metric]!=0]
        try:
            topn_list.extend(list(new_data.sort_values(metric, ascending=False).osmid[:n]))
        except:
            topn_list.extend(list(new_data.sort_values(metric).osmid))

    topn_list = list(set(topn_list))
    data2 = crit_df2.iloc[:, crit_df2.columns != 'osmid']
    crit_df2 = crit_df2.loc[crit_df2['osmid'].isin(topn_list)]

    #calculate spearman correlation coefficient
    spearmanr_df = pd.DataFrame(np.nan, index=data2.columns, columns=data2.columns)

    for index, rows1 in spearmanr_df.iterrows():
        for value, rows2 in rows1.iteritems():
            r, p, n = crit.correlate_metric_spearman(df=crit_df2, m_a=index, m_b=value)
            spearmanr_df.set_value(index, value, r)
            
    return spearmanr_df

In [4]:
#record list of metrics in a correct order
all_metric = ['m01_01', 'm01_02', 'm02_01', 'm02_02', 'm03_01', 'm03_02', 'm04_01', 'm04_02', 'm05_01', 'm06_01',
             'm07_01', 'm07_02', 'm07_03', 'm08_01', 'm08_02', 'm08_03', 'm09_01', 'm10']

1. Analysis of individual subnetworks


In [5]:
file_loc_barisal = r'./criticality_results/result_BD_BA2'
file_loc_chittagong = r'./criticality_results/result_BD_CG2'
file_loc_dhaka = r'./criticality_results/result_BD_DA2'
file_loc_khulna = r'./criticality_results/result_BD_KH2'
file_loc_rangpur = r'./criticality_results/result_BD_RP2'
file_loc_rajshahi = r'./criticality_results/result_BD_RS2'
file_loc_sylhet = r'./criticality_results/result_BD_SY2'

barisal_spearman_df = calculate_correlation(file_loc_barisal)
chittagong_spearman_df = calculate_correlation(file_loc_chittagong)
dhaka_spearman_df = calculate_correlation(file_loc_dhaka)
khulna_spearman_df = calculate_correlation(file_loc_khulna)
rangpur_spearman_df = calculate_correlation(file_loc_rangpur)
rajshahi_spearman_df = calculate_correlation(file_loc_rajshahi)
sylhet_spearman_df = calculate_correlation(file_loc_sylhet)


C:\Users\bjafino\anaconda3\envs\py27\lib\site-packages\ipykernel\__main__.py:46: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead

In [6]:
net_v.correlation_plot(barisal_spearman_df, title='Spearman Rank Correlation Barisal', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [7]:
net_v.correlation_plot(chittagong_spearman_df, title='Spearman Rank Correlation Chittagong', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [8]:
net_v.correlation_plot(dhaka_spearman_df, title='Spearman Rank Correlation Dhaka', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [9]:
net_v.correlation_plot(khulna_spearman_df, title='Spearman Rank Correlation Khulna', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [10]:
net_v.correlation_plot(rangpur_spearman_df, title='Spearman Rank Correlation Rangpur', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [11]:
net_v.correlation_plot(rajshahi_spearman_df, title='Spearman Rank Correlation Rajshahi', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [12]:
net_v.correlation_plot(sylhet_spearman_df, title='Spearman Rank Correlation Sylhet', cmap='RdBu_r',
                       vmin=-1, vmax=1)


2. Robustness of the rank correlations

The aim of this analysis is to identify the robustness of the correlations among the metrics. The question to be answered is: are the correlations network dependent? To achieve this, this analysis identifies correlations that hold true across the subnetworks.

Some distance-based indicators are used here:

  • Mean (if the mean approaches (-)1 then the high (dis)similarity degree between the two metrics are robust)
  • Range (difference between the highest and the lowest value)
  • Consistency of correlation: presence of contradicting correlation directions (negative in some subnetworks while positive in others)
  • Std??? ((data is too few to calculate a meaningful std))
  • Skewness???

In [13]:
all_ = [barisal_spearman_df, chittagong_spearman_df, dhaka_spearman_df, khulna_spearman_df, 
                   rangpur_spearman_df, rajshahi_spearman_df, sylhet_spearman_df]

In [14]:
#create df that contains lists of correlation values
all_spearman_df = pd.DataFrame(np.nan, index = barisal_spearman_df.index, columns = barisal_spearman_df.columns)

for col in all_spearman_df.columns:
    all_spearman_df[col] = all_spearman_df[col].astype(list) #so that we can assign 'list' object as a value in the dataframe

for i in range(barisal_spearman_df.shape[0]):
    for j in range(barisal_spearman_df.shape[1]):
        val_list = []
        for k in range(len(all_)):
            val_list.append(all_[k].iloc[i,j])
            
        all_spearman_df.iloc[i,j] = val_list

In [15]:
#sanity check
all_spearman_df.iloc[:4, :4]


Out[15]:
m01_01 m01_02 m02_01 m02_02
m01_01 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] [0.8785180309863647, 0.9615696257552463, 0.981... [0.16260483187024613, 0.36833381234059276, 0.3... [0.19874557776615032, 0.3923253305851553, 0.46...
m01_02 [0.8785180309863648, 0.9615696257552463, 0.981... [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] [0.07940566755926337, 0.40138087569875075, 0.4... [0.12110260735768588, 0.42505992622624766, 0.4...
m02_01 [0.16260483187024613, 0.36833381234059276, 0.3... [0.07940566755926337, 0.40138087569875075, 0.4... [0.9999999999999998, 1.0, 0.9999999999999999, ... [0.9911669780873109, 0.989309771995661, 0.9815...
m02_02 [0.19874557776615032, 0.3923253305851553, 0.46... [0.12110260735768588, 0.42505992622624766, 0.4... [0.9911669780873109, 0.989309771995661, 0.9815... [0.9999999999999999, 0.9999999999999999, 1.0, ...

In [21]:
def contrast(l):
    maxval = np.max(l)
    minval = np.min(l)
    if maxval <= 0 and minval <= 0:
        return 1
    elif maxval >= 0 and minval >= 0:
        return 1
    else:
        return 0

In [22]:
mean_spearman_df = all_spearman_df.applymap(lambda x: np.mean(x))
range_spearman_df = all_spearman_df.applymap(lambda x: np.max(x) - np.min(x))
contrast_spearman_df = all_spearman_df.applymap(lambda x: contrast(x))

In [23]:
net_v.correlation_plot(mean_spearman_df, title='Mean of Spearman-rank correlation across subnetworks', cmap='RdBu_r',
                       vmin=-1, vmax=1)



In [24]:
net_v.correlation_plot(range_spearman_df, title='Range of Spearman-rank correlation across subnetworks', cmap='Oranges',
                       vmin=0, vmax=1.6)



In [25]:
net_v.correlation_plot(contrast_spearman_df, title='Consistency of Spearman-rank correlation \n(1: consistent, negative/positive correlations in all subnetworks)', 
                       cmap='Oranges', vmin=0, vmax=1)



In [ ]: