Percolation


In [5]:
import numpy, networkx, pandas

# import graph_tool
# from graph_tool.all import *

# from matplotlib import pyplot

# %matplotlib inline

In [2]:
# simple parallelization

# import multiprocessing
# cpus = multiprocessing.cpu_count()
# pool = multiprocessing.Pool(processes=cpus)
# pool.map(...)

Random failure


In [12]:
def randomFailure(graph, steps=101):
    initialGraph = graph
    initialGraphSize = networkx.number_of_nodes(initialGraph)
    numbersOfNodesToRemove = numpy.linspace(0, initialGraphSize, num=steps, dtype='int')
    initialNodes = initialGraph.nodes()
    randomizedNodes = numpy.random.permutation(initialNodes)
    
    def analyzeSingleGraph(index):
        # TODO vedere se si possono agevolmente parallelizzare le list comprehension
        # che sono molto più scorrevoli da usare ripetto a map() 
        newGraph = initialGraph.copy()
        newGraph.remove_nodes_from(randomizedNodes[0:index])
        newGraphSize = networkx.number_of_nodes(newGraph)
        grado = newGraph.degree().items()
        # subgraphs = sorted(networkx.connected_component_subgraphs(newGraph), key = len, reverse=True)
        subgraphs = networkx.connected_component_subgraphs(newGraph)
        try:
            # giantCluster = subgraphs[0]
            giantCluster = max(subgraphs, key = networkx.number_of_nodes)
            giantClusterSize = networkx.number_of_nodes(giantCluster)
            relativeGiantClusterSize = numpy.true_divide(giantClusterSize, newGraphSize)
            diameter = networkx.diameter(giantCluster, e=None)
        except:
            giantCluster = networkx.empty_graph()
            giantClusterSize = 0
            relativeGiantClusterSize = 0
            diameter = 0
        return relativeGiantClusterSize, diameter
    
    # TODO parallelizzare questa mappa
    failureResults = map(analyzeSingleGraph, numbersOfNodesToRemove)
    failureDataframe = pandas.DataFrame(failureResults, columns=['relativeGiantClusterSize', 'diameter'])
    ascisse = numpy.linspace(0,100, num=steps, dtype='int')
    failureDataframe['percentuale'] = ascisse
    
    return failureDataframe

Intentional attack


In [13]:
def intentionalAttack(graph, steps=101):
    initialGraph = graph
    initialGraphSize = networkx.number_of_nodes(initialGraph)
    numbersOfNodesToRemove = numpy.linspace(0, initialGraphSize, num=steps, dtype='int')
    initialNodes = initialGraph.nodes()
    
    initialDegrees = initialGraph.degree()
    degreeDataframe = pandas.DataFrame(initialDegrees.items(), columns=['ID', 'degree'])
    degreeDataframe.sort(["degree"], ascending=[False], inplace=True) # TODO vedere se si può fare a meno di una colonna
    # degreeDataframe = degreeDataframe.reset_index(drop=True)
    sortedNodes = degreeDataframe['ID'].values # TODO degreeDataframe.ID
    
    def analyzeSingleGraph(number):
        # TODO vedere se si possono agevolmente parallelizzare le list comprehension
        # che sono molto più scorrevoli da usare ripetto a map() 
        newGraph = initialGraph.copy()
        newGraph.remove_nodes_from(sortedNodes[0:number]) # TODO vedere ordinamento più veloce
        newGraphSize = networkx.number_of_nodes(newGraph)
        grado = newGraph.degree().items()
        # subgraphs = sorted(networkx.connected_component_subgraphs(newGraph), key = len, reverse=True)
        subgraphs = networkx.connected_component_subgraphs(newGraph)
        try:
            # giantCluster = subgraphs[0]
            giantCluster = max(subgraphs, key = networkx.number_of_nodes)
            giantClusterSize = networkx.number_of_nodes(giantCluster)
            relativeGiantClusterSize = numpy.true_divide(giantClusterSize, newGraphSize)
            diameter = networkx.diameter(giantCluster, e=None)
        except:
            giantCluster = networkx.empty_graph()
            giantClusterSize = 0
            relativeGiantClusterSize = 0
            diameter = 0
        return relativeGiantClusterSize, diameter
    
    # TODO parallelizzare questa mappa
    attackResults = map(analyzeSingleGraph, numbersOfNodesToRemove)
    attackDataframe = pandas.DataFrame(attackResults, columns=['relativeGiantClusterSize', 'diameter'])
    ascisse = numpy.linspace(0,100, num=steps, dtype='int')
    attackDataframe['percentuale'] = ascisse
    
    return attackDataframe

In [14]:
#gestori = ["Tim", "Vodafone", "Wind", "Tre", "Roma"]
#colori = ['#004184','#ff3300','#ff8000','#018ECC', '#4d4d4d']

gestori = ["Tim", "Vodafone", "Wind", "Tre"]
colori = ['#004184','#ff3300','#ff8000','#018ECC']

#gestori = ["Tim", "Tre"]
#colori = ['#004184','#018ECC']

In [15]:
# data reading, calculations, data writing

# TODO parallelizzare
for provider in gestori:
    
    # read data
    adjacencyMatrix = numpy.genfromtxt(("../data/graphs/adiacenzaEuclidea_{0}.csv".format(provider)),
                                 delimiter=',',
                                 dtype='int')
    providerGraph = networkx.Graph(adjacencyMatrix)
    
    # calculate results
    print provider, "random failure:"
    %time failureResults = randomFailure(providerGraph, steps=101) # default: steps=101
    print provider, "intentional attack:"
    %time attackResults = intentionalAttack(providerGraph, steps=101)
    
    # write on file
    failureResults.to_csv('../data/percolation/randomFailure_{0}.csv'.format(provider), index=False)
    attackResults.to_csv('../data/percolation/intentionalAttack_{0}.csv'.format(provider), index=False)


Tim random failure:
CPU times: user 12min 18s, sys: 6.36 s, total: 12min 25s
Wall time: 12min 18s
Tim intentional attack:
CPU times: user 4min, sys: 2.35 s, total: 4min 2s
Wall time: 4min
Vodafone random failure:
CPU times: user 9min 30s, sys: 5.64 s, total: 9min 36s
Wall time: 9min 30s
Vodafone intentional attack:
CPU times: user 3min 15s, sys: 1.71 s, total: 3min 16s
Wall time: 3min 15s
Wind random failure:
CPU times: user 37min 15s, sys: 21.9 s, total: 37min 37s
Wall time: 37min 15s
Wind intentional attack:
CPU times: user 10min 19s, sys: 5.12 s, total: 10min 24s
Wall time: 10min 19s
Tre random failure:
CPU times: user 6min 18s, sys: 3.06 s, total: 6min 22s
Wall time: 6min 19s
Tre intentional attack:
CPU times: user 2min 4s, sys: 816 ms, total: 2min 5s
Wall time: 2min 4s
/home/protoss/Programmi/VirtualEnvironment/lib/python2.7/site-packages/ipykernel/__main__.py:10: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [ ]:


In [ ]: