LTC details: normalized (nodes)


In [1]:
%matplotlib inline

import pandas as pd
import pylab
import matplotlib.pyplot as plt
import openpyxl

In [3]:
df = pd.read_csv("../../data/processed/result_detail_ltc.csv",delimiter=";")
df.head()


Out[3]:
id_node lvl ltc
0 128656 0 4
1 128656 1 7676
2 128656 2 9455
3 128656 3 9707
4 128656 4 9747

In [6]:
# Normalize lineal_threshold
df['ltc'] = df['ltc'].astype(float)
df['ltc'] = (df['ltc'] / 256490)
df.head()


Out[6]:
id_node lvl ltc
0 128656 0 0.000016
1 128656 1 0.029927
2 128656 2 0.036863
3 128656 3 0.037846
4 128656 4 0.038001

In [7]:
# For each node group by id_node and select the last level
df2 = df.groupby(['id_node']).last()
df2.head()


Out[7]:
lvl ltc
id_node
2 2 0.000101
3 2 0.000234
4 5 0.055827
5 1 0.000097
6 1 0.000327

In [8]:
df2.groupby(['lvl']).size()


Out[8]:
lvl
0     42129
1     63189
2     73260
3     51118
4     11516
5     14861
6       211
7       171
8        22
9         8
10        4
12        2
dtype: int64

In [9]:
df2[['lvl','ltc']].groupby(['lvl']).size().plot(logy=True)

pylab.title("Nodes vs levels")
pylab.xlabel("Levels")
_ = pylab.ylabel("Nodes")



In [14]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [15]:
print_full(df2[['lvl','ltc']].groupby(['lvl']).describe())


                    ltc
lvl                    
0   count  42129.000000
    mean       0.000007
    std        0.000016
    min        0.000004
    25%        0.000004
    50%        0.000004
    75%        0.000004
    max        0.000752
1   count  63189.000000
    mean       0.000125
    std        0.000264
    min        0.000008
    25%        0.000016
    50%        0.000039
    75%        0.000121
    max        0.003462
2   count  73260.000000
    mean       0.001004
    std        0.001108
    min        0.000012
    25%        0.000238
    50%        0.000597
    75%        0.001353
    max        0.009022
3   count  51118.000000
    mean       0.005272
    std        0.005090
    min        0.000016
    25%        0.001552
    50%        0.003552
    75%        0.007431
    max        0.042364
4   count  11516.000000
    mean       0.004471
    std        0.005204
    min        0.000035
    25%        0.001575
    50%        0.002657
    75%        0.004936
    max        0.050942
5   count  14861.000000
    mean       0.035239
    std        0.010734
    min        0.000125
    25%        0.034317
    50%        0.034317
    75%        0.036586
    max        0.103408
6   count    211.000000
    mean       0.043693
    std        0.022972
    min        0.001513
    25%        0.035976
    50%        0.041097
    75%        0.057039
    max        0.114106
7   count    171.000000
    mean       0.053063
    std        0.022264
    min        0.009314
    25%        0.038206
    50%        0.048064
    75%        0.062104
    max        0.206690
8   count     22.000000
    mean       0.060929
    std        0.026563
    min        0.011817
    25%        0.052497
    50%        0.062254
    75%        0.079687
    max        0.108714
9   count      8.000000
    mean       0.072538
    std        0.033621
    min        0.015061
    25%        0.067301
    50%        0.087148
    75%        0.093168
    max        0.097411
10  count      4.000000
    mean       0.070088
    std        0.029425
    min        0.027818
    25%        0.061866
    50%        0.079346
    75%        0.087568
    max        0.093840
12  count      2.000000
    mean       0.103445
    std        0.020249
    min        0.089126
    25%        0.096285
    50%        0.103445
    75%        0.110604
    max        0.117763

In [11]:
df2[['lvl','ltc']].corr()


Out[11]:
lvl ltc
lvl 1.000000 0.673117
ltc 0.673117 1.000000

In [12]:
df2[['lvl','ltc']].corr(method="spearman")


Out[12]:
lvl ltc
lvl 1.000000 0.893708
ltc 0.893708 1.000000

In [13]:
df2[['lvl','ltc']].corr(method="kendall")


Out[13]:
lvl ltc
lvl 1.000000 0.773618
ltc 0.773618 1.000000

In [ ]: