Attacco e failure con Graph-tool

Importo librerie e definisco funzioni fondamentali


In [2]:
from graph_tool.all import *
import matplotlib
from matplotlib import pyplot
import numpy
import pandas
import math
#import seaborn


gestore = ["Tim", "Vodafone", "Wind", "Tre", "Roma"]
colori = ['#004184','#ff3300','#ff8000','#018ECC', '#4d4d4d']

In [3]:
#Costruzione grafo con graph-tool da matrice di adiacenza
def conversione(grafo, adiacenza):
    grafo.add_vertex(len(adiacenza))
    num_vertices = adiacenza.shape[0]
    for i in range(num_vertices - 1):
        for j in range(i + 1, num_vertices):
            if adiacenza[i,j] != 0:
                e = grafo.add_edge(i, j)
                
def costruzione(gestore):
    grafo = graph_tool.Graph(directed = False)
    adiacenza = numpy.genfromtxt("/home/protoss/Documenti/Siscomp_datas/data/AdiacenzaEuclidea_{0}.csv".format(gestore),delimiter=',',dtype='int')
    conversione(grafo, adiacenza)
    grafo.save("GTool{0}.xml".format(gestore))
    #graph_tool.draw.graph_draw(grafoTre)

In [4]:
#funzioni topologiche
def averageLength(grafo):
    istoLength = graph_tool.stats.distance_histogram(grafo)
    istoLength[1] = numpy.delete(istoLength[1], len(istoLength[1])-1)
    return numpy.average(istoLength[1], weights=istoLength[0])

def diameter(grafo):
    istoLength = graph_tool.stats.distance_histogram(grafo)
    return len(istoLength[0])-1

def clustering(grafo):
    cluster = graph_tool.clustering.local_clustering(grafo)
    array = numpy.array(cluster.a)
    return numpy.average(array)

Cicli input/output gafi


In [5]:
#Eseguire soltanto la prima volta per costruire e salvare le reti
for compagnia in gestore:
    %time costruzione(compagnia)


CPU times: user 13.1 s, sys: 96 ms, total: 13.2 s
Wall time: 13.2 s
CPU times: user 11.6 s, sys: 64 ms, total: 11.7 s
Wall time: 11.5 s
CPU times: user 26.4 s, sys: 252 ms, total: 26.6 s
Wall time: 26.2 s
CPU times: user 7.04 s, sys: 68 ms, total: 7.1 s
Wall time: 7 s
CPU times: user 3min 40s, sys: 1.68 s, total: 3min 41s
Wall time: 3min 40s

Analisi preliminare delle reti


In [6]:
def topologia(compagnia):
    azienda = []
    diametro = []
    cammino = []
    cluster = []
    relSizeGC = []
    gradomedio = []
    criterio = []
    fcritica = []

    grafo = load_graph("/home/protoss/Documenti/Siscomp_datas/data/GTool{0}.xml".format(compagnia))
    
    def gradonodo(identificativo):
        vertice = grafo.vertex(identificativo)
        return vertice.out_degree()

    def kmedio(listadeg):
        return numpy.mean(listadeg)
    
    def kquadromedio(listadeg):
        listadegQuadri = numpy.power(listadeg, 2)
        return numpy.mean(listadegQuadri)
    
    def freqCriterion(criterion):
        return 1-(1/(criterion-1))
    
    graphSize = grafo.num_vertices()
    giantCluster = graph_tool.topology.label_largest_component(grafo)
    giantCluster = graph_tool.GraphView(grafo, vfilt=giantCluster)
    
    azienda.append(compagnia)
    diametro.append(diameter(grafo))
    cammino.append(averageLength(grafo))
    cluster.append(clustering(grafo))
    relSizeGC.append((giantCluster.num_vertices())/(float(graphSize)))

    indice = numpy.arange(grafo.num_vertices())
    listaGradi = map(gradonodo, indice)
        
    gradomedio.append(kmedio(listaGradi))
    criterion = kquadromedio(listaGradi)/kmedio(listaGradi)
    criterio.append(criterion)
    fcritica.append(freqCriterion(criterion))

In [15]:
# caricamento rete e studio topologico iniziale
for compagnia in gestore:
    %time topologia(compagnia)

datiInitial = pandas.DataFrame()
datiInitial['Rete'] = azienda
datiInitial['GC %'] = relSizeGC
datiInitial['D'] = diametro
datiInitial['<l>'] = cammino
datiInitial['C'] = cluster
datiInitial['<k>'] = gradomedio
datiInitial['<k^2>/<k>'] = criterio
datiInitial['f'] = fcritica

datiInitial.to_csv("/home/protoss/Documenti/SistemiComplessi/data/Iuri/DatiIniziali.csv")


CPU times: user 6.14 s, sys: 24 ms, total: 6.17 s
Wall time: 1.01 s
CPU times: user 5.18 s, sys: 28 ms, total: 5.2 s
Wall time: 827 ms
CPU times: user 16.9 s, sys: 252 ms, total: 17.1 s
Wall time: 2.61 s
CPU times: user 2.8 s, sys: 16 ms, total: 2.81 s
Wall time: 524 ms
CPU times: user 8min 56s, sys: 3.49 s, total: 8min 59s
Wall time: 1min 15s

In [19]:
datiInitial


Out[19]:
Rete GC % D <l> C <k> <k^2>/<k> f
0 Tim 1 2 1.963367 0.894802 64.291572 358.738414 0.997205
1 Vodafone 1 2 1.970122 0.872207 52.884246 243.253262 0.995872
2 Wind 1 2 1.958821 0.887913 97.346300 477.743154 0.997902
3 Tre 1 2 1.970320 0.880245 41.373477 239.418636 0.995806
4 Roma 1 2 1.967178 0.889167 239.142034 1353.163123 0.999260

Simulazione attacco, andamento grandezze topologiche in funzione dei nodi rimossi


In [8]:
#PERCENT ATTACK!!
def attackPercent(compagnia, steps):
    grafoFinal = load_graph("/home/protoss/Documenti/Siscomp_datas/data/GTool{0}.xml".format(compagnia))

    def gradonodo(identificativo):
        vertice = grafoFinal.vertex(identificativo)
        return vertice.out_degree()

    def kmedio(listadeg):
        return numpy.mean(listadeg)
    
    def kquadromedio(listadeg):
        listadegQuadri = numpy.power(listadeg, 2)
        return numpy.mean(listadegQuadri)
    
    graphSize = grafoFinal.num_vertices()
    passo = graphSize/steps

    i = 0
    ascisse.append(i)
    aziendaFinal.append(compagnia)

    diametro.append(diameter(grafoFinal))
    cammino.append(averageLength(grafoFinal))
    cluster.append(clustering(grafoFinal))
    
    indice = numpy.arange(graphSize)
    listaGradi = map(gradonodo, indice)

    gradomedio.append(kmedio(listaGradi))
    criterion = kquadromedio(listaGradi)/kmedio(listaGradi)
    criterio.append(criterion)
    relSizeGC.append(1)
    
    while (grafoFinal.num_vertices() > passo):    
        gradiFinal = pandas.DataFrame(listaGradi, columns=['grado'])
        gradiFinal = pandas.DataFrame.reset_index(gradiFinal)
        gradiFinal.sort(["grado"], ascending=[False], inplace=True)
        sortedIDnode = gradiFinal['index'].values

        daRimuovere = numpy.take(sortedIDnode, range(passo))
        grafoFinal.remove_vertex(daRimuovere)

        giantCluster = graph_tool.topology.label_largest_component(grafoFinal)
        giantCluster = graph_tool.GraphView(grafoFinal, vfilt=giantCluster)
        grafoFinal = Graph(grafoFinal, prune = True)
        
        graphSize = graphSize-passo
        i += 100/steps
        ascisse.append(i)
        aziendaFinal.append(compagnia)
        
        
        diametro.append(diameter(grafoFinal))
        cammino.append(averageLength(grafoFinal))
        cluster.append(clustering(grafoFinal))
        relSizeGC.append((giantCluster.num_vertices())/(float(graphSize)))
        
        indice = numpy.arange(grafoFinal.num_vertices())
        listaGradi = map(gradonodo, indice)
        
        gradomedio.append(kmedio(listaGradi))
        criterion = kquadromedio(listaGradi)/kmedio(listaGradi)
        criterio.append(criterion)

#PERCENT FAILURE!!
def failurePercent(compagnia, steps):
    grafoFinal = load_graph("/home/protoss/Documenti/Siscomp_datas/data/GTool{0}.xml".format(compagnia))

    def gradonodo(identificativo):
        vertice = grafoFinal.vertex(identificativo)
        return vertice.out_degree()

    def kmedio(listadeg):
        return numpy.mean(listadeg)
    
    def kquadromedio(listadeg):
        listadegQuadri = numpy.power(listadeg, 2)
        return numpy.mean(listadegQuadri)
    
    graphSize = grafoFinal.num_vertices()
    passo = graphSize/steps

    i = 0
    ascisse.append(i)
    aziendaFinal.append(compagnia)

    diametro.append(diameter(grafoFinal))
    cammino.append(averageLength(grafoFinal))
    cluster.append(clustering(grafoFinal))
    
    indice = numpy.arange(graphSize)
    listaGradi = map(gradonodo, indice)

    gradomedio.append(kmedio(listaGradi))
    criterion = kquadromedio(listaGradi)/kmedio(listaGradi)
    criterio.append(criterion)
    relSizeGC.append(1)
    
    while (grafoFinal.num_vertices() > passo): 
        gradiFinal = pandas.DataFrame(listaGradi, columns=['grado'])
        gradiFinal = pandas.DataFrame.reset_index(gradiFinal)
        randomante = gradiFinal['index'].values
        randomante = numpy.random.permutation(randomante)

        daRimuovere = numpy.take(randomante, range(passo))
        grafoFinal.remove_vertex(daRimuovere)

        giantCluster = graph_tool.topology.label_largest_component(grafoFinal)
        giantCluster = graph_tool.GraphView(grafoFinal, vfilt=giantCluster)
        grafoFinal = Graph(grafoFinal, prune = True)
        
        graphSize = graphSize-passo
        i += 100/steps
        ascisse.append(i)
        aziendaFinal.append(compagnia)
        
        
        diametro.append(diameter(grafoFinal))
        cammino.append(averageLength(grafoFinal))
        cluster.append(clustering(grafoFinal))
        relSizeGC.append((giantCluster.num_vertices())/(float(graphSize)))
        
        indice = numpy.arange(grafoFinal.num_vertices())
        listaGradi = map(gradonodo, indice)
        
        gradomedio.append(kmedio(listaGradi))
        criterion = kquadromedio(listaGradi)/kmedio(listaGradi)
        criterio.append(criterion)

In [17]:
gestore = ["Tim", "Vodafone", "Wind", "Tre"]
#gestore = ["Roma"]
#compagnia = "Tre"

indice = []
diametro = []
cammino = []
cluster = []
gradomedio = []
criterio = []
relSizeGC = []

aziendaFinal = []
ascisse = []

for compagnia in gestore:
    %time attackPercent(compagnia, 100)

#attackPercent(compagnia, 100)

datiFinal = pandas.DataFrame()
datiFinal['percent'] = ascisse

datiFinal['Provider'] = aziendaFinal
datiFinal['diameter'] = diametro
datiFinal['average path length'] = cammino
datiFinal['clustering'] = cluster
datiFinal['average degree'] = gradomedio
datiFinal['soglia percolativa'] = criterio

datiFinal['GCsize'] = relSizeGC
#datiFinal.to_csv("/home/protoss/Documenti/SistemiComplessi/data/Iuri/GtoolAttackDataForSeaborn.csv")
datiFinal.head()


CPU times: user 1min 3s, sys: 260 ms, total: 1min 3s
Wall time: 14.4 s
CPU times: user 1min 3s, sys: 88 ms, total: 1min 3s
Wall time: 14.2 s
CPU times: user 2min 24s, sys: 2.6 s, total: 2min 27s
Wall time: 27.4 s
CPU times: user 32 s, sys: 116 ms, total: 32.2 s
Wall time: 9.01 s
Out[17]:
percent Provider diameter average path length clustering average degree soglia percolativa GCsize
0 0 Tim 2 1.963367 0.894802 64.291572 358.738414 1.000000
1 1 Tim 4 2.349932 0.861790 40.995975 156.940162 1.000000
2 2 Tim 7 3.215234 0.824935 30.469222 86.327819 0.996516
3 3 Tim 9 3.873727 0.800481 25.241056 64.123710 0.989443
4 4 Tim 11 4.269235 0.788518 21.501185 49.697912 0.985782

In [8]:
gestore = ["Tim", "Vodafone", "Wind", "Tre", "Roma"]
#gestore = ["Tim", "Vodafone", "Wind", "Tre"]
#gestore = "Roma"
diametro = []
cammino = []
cluster = []
gradomedio = []
criterio = []
relSizeGC = []

aziendaFinal = []
ascisse = []

for compagnia in gestore:
    %time failurePercent(compagnia, 100)

#attackPercent(gestore, 50)

datiFinal = pandas.DataFrame()
datiFinal['percent'] = ascisse

datiFinal['Provider'] = aziendaFinal
datiFinal['diameter'] = diametro
datiFinal['average path length'] = cammino
datiFinal['clustering'] = cluster
datiFinal['average degree'] = gradomedio
datiFinal['soglia percolativa'] = criterio

datiFinal['GCsize'] = relSizeGC
datiFinal.to_csv("/home/protoss/Documenti/SistemiComplessi/data/Iuri/GtoolFailureDataForSeaborn.csv")
datiFinal.head()


CPU times: user 4min 37s, sys: 1.47 s, total: 4min 38s
Wall time: 48.1 s
CPU times: user 3min 51s, sys: 1.2 s, total: 3min 53s
Wall time: 40.8 s
CPU times: user 11min 29s, sys: 6.98 s, total: 11min 36s
Wall time: 1min 51s
CPU times: user 2min 17s, sys: 632 ms, total: 2min 18s
Wall time: 26.3 s
CPU times: user 4h 20s, sys: 2min 11s, total: 4h 2min 31s
Wall time: 33min 45s
Out[8]:
percent Provider diameter average path length clustering average degree soglia percolativa GCsize
0 0 Tim 2 1.963367 0.894802 64.291572 358.738414 1
1 1 Tim 2 1.963895 0.895117 62.749856 354.308847 1
2 2 Tim 2 1.963644 0.895267 62.569106 352.398315 1
3 3 Tim 2 1.963417 0.895500 62.336657 349.722856 1
4 4 Tim 2 1.963149 0.895582 62.168246 347.416905 1