In the previous analysis, metrics (dis)similarities (i.e. the degree of overlap/complementarity) were analyzed for the whole transport network in Bangladesh. Therefore, the conclusion from the (dis)similarities may not hold true if an analysis is conducted for different transport network.
To test the generalizability of the metrics (dis)similarities, the criticality analysis was redone to seven subnetworks comprising seven divisions in Bangladesh. The results of the criticality metrics from each subnetwork are analyzed by using Spearman-rank correlation coefficient in this notebook.
In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.pylab import *
import matplotlib.colors as colors
import seaborn as sns
from __future__ import division
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
sys.path.append(module_path)
#Modules developed for this project
from transport_network_modeling import network_visualization as net_v
from transport_network_modeling import criticality as crit
In [2]:
sys.version
Out[2]:
In [3]:
#the criticality results for the subnetworks were stored in pickle format
#the following function is used to load the pickle files
import pickle
def load_obj(name ):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
def calculate_correlation(file_loc):
res_dict = load_obj(file_loc)
crit_df = pd.DataFrame.from_dict(res_dict, orient='index')
crit_df.columns = ['m3_01', 'm3_02', 'm5_01', 'm8_02', 'm10', 'm1_01', 'm1_02',
'm2_01', 'm2_02','m4_02', 'm6_01', 'm7_01', 'm7_02', 'm7_03', 'm9_01', 'm4_01', 'm8_01', 'm8_03']
crit_df['osmid'] = crit_df.index
crit_df.index = np.arange(0,len(crit_df),1)
#alter wrong values
crit_df['m2_02'] = crit_df.m2_02.apply(lambda val: 0 if val < 1.39e-10 else val)
crit_df['m5_01'] = crit_df.m5_01.apply(lambda x: 1/x if x > 0 else 2)
crit_df2 = crit_df[['osmid','m1_01', 'm1_02', 'm2_01', 'm2_02', 'm3_01', 'm3_02', 'm4_01', 'm4_02', 'm5_01', 'm6_01',
'm7_01', 'm7_02', 'm7_03', 'm8_01', 'm8_02', 'm8_03', 'm9_01', 'm10']]
crit_df2.columns = ['osmid','m01_01', 'm01_02', 'm02_01', 'm02_02', 'm03_01', 'm03_02', 'm04_01', 'm04_02', 'm05_01', 'm06_01',
'm07_01', 'm07_02', 'm07_03', 'm08_01', 'm08_02', 'm08_03', 'm09_01', 'm10']
#record top 100 critical links
n=100
topn_list = []
for metric in all_metric:
new_data = crit_df2.loc[crit_df2[metric]!=0]
try:
topn_list.extend(list(new_data.sort_values(metric, ascending=False).osmid[:n]))
except:
topn_list.extend(list(new_data.sort_values(metric).osmid))
topn_list = list(set(topn_list))
data2 = crit_df2.iloc[:, crit_df2.columns != 'osmid']
crit_df2 = crit_df2.loc[crit_df2['osmid'].isin(topn_list)]
#calculate spearman correlation coefficient
spearmanr_df = pd.DataFrame(np.nan, index=data2.columns, columns=data2.columns)
for index, rows1 in spearmanr_df.iterrows():
for value, rows2 in rows1.iteritems():
r, p, n = crit.correlate_metric_spearman(df=crit_df2, m_a=index, m_b=value)
spearmanr_df.set_value(index, value, r)
return spearmanr_df
In [4]:
#record list of metrics in a correct order
all_metric = ['m01_01', 'm01_02', 'm02_01', 'm02_02', 'm03_01', 'm03_02', 'm04_01', 'm04_02', 'm05_01', 'm06_01',
'm07_01', 'm07_02', 'm07_03', 'm08_01', 'm08_02', 'm08_03', 'm09_01', 'm10']
In [5]:
file_loc_barisal = r'./criticality_results/result_BD_BA2'
file_loc_chittagong = r'./criticality_results/result_BD_CG2'
file_loc_dhaka = r'./criticality_results/result_BD_DA2'
file_loc_khulna = r'./criticality_results/result_BD_KH2'
file_loc_rangpur = r'./criticality_results/result_BD_RP2'
file_loc_rajshahi = r'./criticality_results/result_BD_RS2'
file_loc_sylhet = r'./criticality_results/result_BD_SY2'
barisal_spearman_df = calculate_correlation(file_loc_barisal)
chittagong_spearman_df = calculate_correlation(file_loc_chittagong)
dhaka_spearman_df = calculate_correlation(file_loc_dhaka)
khulna_spearman_df = calculate_correlation(file_loc_khulna)
rangpur_spearman_df = calculate_correlation(file_loc_rangpur)
rajshahi_spearman_df = calculate_correlation(file_loc_rajshahi)
sylhet_spearman_df = calculate_correlation(file_loc_sylhet)
In [6]:
net_v.correlation_plot(barisal_spearman_df, title='Spearman Rank Correlation Barisal', cmap='RdBu_r',
vmin=-1, vmax=1)
In [7]:
net_v.correlation_plot(chittagong_spearman_df, title='Spearman Rank Correlation Chittagong', cmap='RdBu_r',
vmin=-1, vmax=1)
In [8]:
net_v.correlation_plot(dhaka_spearman_df, title='Spearman Rank Correlation Dhaka', cmap='RdBu_r',
vmin=-1, vmax=1)
In [9]:
net_v.correlation_plot(khulna_spearman_df, title='Spearman Rank Correlation Khulna', cmap='RdBu_r',
vmin=-1, vmax=1)
In [10]:
net_v.correlation_plot(rangpur_spearman_df, title='Spearman Rank Correlation Rangpur', cmap='RdBu_r',
vmin=-1, vmax=1)
In [11]:
net_v.correlation_plot(rajshahi_spearman_df, title='Spearman Rank Correlation Rajshahi', cmap='RdBu_r',
vmin=-1, vmax=1)
In [12]:
net_v.correlation_plot(sylhet_spearman_df, title='Spearman Rank Correlation Sylhet', cmap='RdBu_r',
vmin=-1, vmax=1)
The aim of this analysis is to identify the robustness of the correlations among the metrics. The question to be answered is: are the correlations network dependent? To achieve this, this analysis identifies correlations that hold true across the subnetworks.
Some distance-based indicators are used here:
In [13]:
all_ = [barisal_spearman_df, chittagong_spearman_df, dhaka_spearman_df, khulna_spearman_df,
rangpur_spearman_df, rajshahi_spearman_df, sylhet_spearman_df]
In [14]:
#create df that contains lists of correlation values
all_spearman_df = pd.DataFrame(np.nan, index = barisal_spearman_df.index, columns = barisal_spearman_df.columns)
for col in all_spearman_df.columns:
all_spearman_df[col] = all_spearman_df[col].astype(list) #so that we can assign 'list' object as a value in the dataframe
for i in range(barisal_spearman_df.shape[0]):
for j in range(barisal_spearman_df.shape[1]):
val_list = []
for k in range(len(all_)):
val_list.append(all_[k].iloc[i,j])
all_spearman_df.iloc[i,j] = val_list
In [15]:
#sanity check
all_spearman_df.iloc[:4, :4]
Out[15]:
In [21]:
def contrast(l):
maxval = np.max(l)
minval = np.min(l)
if maxval <= 0 and minval <= 0:
return 1
elif maxval >= 0 and minval >= 0:
return 1
else:
return 0
In [22]:
mean_spearman_df = all_spearman_df.applymap(lambda x: np.mean(x))
range_spearman_df = all_spearman_df.applymap(lambda x: np.max(x) - np.min(x))
contrast_spearman_df = all_spearman_df.applymap(lambda x: contrast(x))
In [23]:
net_v.correlation_plot(mean_spearman_df, title='Mean of Spearman-rank correlation across subnetworks', cmap='RdBu_r',
vmin=-1, vmax=1)
In [24]:
net_v.correlation_plot(range_spearman_df, title='Range of Spearman-rank correlation across subnetworks', cmap='Oranges',
vmin=0, vmax=1.6)
In [25]:
net_v.correlation_plot(contrast_spearman_df, title='Consistency of Spearman-rank correlation \n(1: consistent, negative/positive correlations in all subnetworks)',
cmap='Oranges', vmin=0, vmax=1)
In [ ]: