In [1]:
import networkx as nx

In [2]:
from prettytable import PrettyTable

Data is political blogs linked to each other. Most of these blogs links are dead

Lada A. Adamic and Natalie Glance, "The political blogosphere and the 2004 US Election", in Proceedings of the WWW-2005 Workshop on the Weblogging Ecosystem (2005)

First we will load the data and look at some of its properties

In [3]:
G = nx.read_gml("polblogs.gml")
The label is the website and the value is 0 or 1 depending on whether the political blog is left or right leaning

In [4]:
G.node[345]


Out[4]:
{'id': 345,
 'label': u'lawdork.blogspot.com',
 'source': u'LeftyDirectory,eTalkingHead',
 'value': 0}

In [5]:
nx.is_directed(G)


Out[5]:
True

In [6]:
G.number_of_edges()


Out[6]:
19090

In [7]:
G.number_of_nodes()


Out[7]:
1490

In [8]:
nx.is_strongly_connected(G)


Out[8]:
False

In [9]:
nx.is_weakly_connected(G)


Out[9]:
False
We will only concern ourselves with the largest weakly connected subgraph

In [10]:
i=nx.weakly_connected_component_subgraphs(G)

In [11]:
l = max(i, key = len)

In [12]:
l.number_of_nodes()


Out[12]:
1222

In [13]:
nx.is_weakly_connected(l)


Out[13]:
True

First we will look at centrality at all the nodes in the Graph

Degree Centrality


In [14]:
deg=nx.degree(G)

In [15]:
deg_cent=nx.degree_centrality(G)

In [16]:
deg_sort=sorted(deg_cent.iteritems(),key=lambda(k,v):(-v,k))

In [17]:
deg_sort[0:9]


Out[17]:
[(855, 0.3143049026192075),
 (155, 0.25789120214909333),
 (1051, 0.24378777703156482),
 (55, 0.23572867696440566),
 (641, 0.1900604432505037),
 (729, 0.17192746809939558),
 (963, 0.16453995970449967),
 (1245, 0.1584956346541303),
 (1153, 0.15379449294828745)]

Betweenness Centrality


In [18]:
bet_cent=nx.betweenness_centrality(G)

In [19]:
bet_sort=sorted(bet_cent.iteritems(),key=lambda(k,v):(-v,k))

In [20]:
bet_sort[0:9]


Out[20]:
[(855, 0.09860123355546516),
 (55, 0.041065409701381644),
 (1051, 0.034423597988754114),
 (155, 0.024815500156320013),
 (454, 0.020714412538725437),
 (387, 0.020319988222406524),
 (1479, 0.018325573772945088),
 (1101, 0.016309365690314936),
 (1041, 0.015458192296913366)]

Closeness Centrality


In [21]:
clo_cent=nx.closeness_centrality(G)

In [22]:
clo_sort=sorted(clo_cent.iteritems(),key=lambda(k,v):(-v,k))

In [23]:
clo_sort[0:9]


Out[23]:
[(855, 0.2707203175398935),
 (880, 0.26707623163293015),
 (387, 0.26523353232024066),
 (935, 0.26523353232024066),
 (927, 0.2635289466369486),
 (856, 0.25996473434092904),
 (1427, 0.2583269892694826),
 (1351, 0.25596194816922097),
 (467, 0.25521849022848053)]

Eigenvector Centrality- We need to convert multigraph to graph


In [ ]:
G2=nx.Graph(G)

In [46]:
eig_cent = nx.eigenvector_centrality(G2)
eig_sort = sorted(eig_cent.iteritems(), key = lambda(k,v):(-v,k))
eig_sort[0:9]


Out[46]:
[(155, 0.1642162196125852),
 (55, 0.16052664009981785),
 (641, 0.14928415558560892),
 (729, 0.1396364639017293),
 (363, 0.11899412475351646),
 (180, 0.1178756132221919),
 (1051, 0.11336486178426823),
 (99, 0.11121661701967456),
 (493, 0.10814306309694945)]

Like the book, we shall look at a table of the blogs for the top measures


In [47]:
names1=[x[0] for x in deg_sort[:10]]
names2=[x[0] for x in bet_sort[:10]]
names3=[x[0] for x in clo_sort[:10]]
names4=[x[0] for x in eig_sort[:10]]
names=list(set(names1)|set(names2)|set(names3)|set(names4))

In [48]:
table=[[name,G.node[name]['label'],G.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4),round(eig_cent[name],4)] for name in names]

In [49]:
table=sorted(table,key=lambda x: -x[3])

In [50]:
t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness','EigenCent'])
for i in range(0, len(table)):
    t.add_row(table[i])

In [51]:
print(t)


+------+-----------------------------------+-------+--------+-------------+-------------+-----------+-----------+
|  ID  |                Link               | Value | Degree | Degree Cent | Betweenness | Closeness | EigenCent |
+------+-----------------------------------+-------+--------+-------------+-------------+-----------+-----------+
| 855  |          blogsforbush.com         |   1   |  468   |    0.3143   |    0.0986   |   0.2707  |   0.0518  |
| 155  |            dailykos.com           |   0   |  384   |    0.2579   |    0.0248   |   0.2037  |   0.1642  |
| 1051 |          instapundit.com          |   1   |  363   |    0.2438   |    0.0344   |   0.242   |   0.1134  |
|  55  |        atrios.blogspot.com        |   0   |  351   |    0.2357   |    0.0411   |   0.2368  |   0.1605  |
| 641  |       talkingpointsmemo.com       |   0   |  283   |    0.1901   |    0.0055   |   0.1875  |   0.1493  |
| 729  |       washingtonmonthly.com       |   0   |  256   |    0.1719   |    0.0147   |   0.2196  |   0.1396  |
| 963  |          drudgereport.com         |   1   |  245   |    0.1645   |    0.0086   |   0.1362  |   0.0531  |
| 1245 |         powerlineblog.com         |   1   |  236   |    0.1585   |    0.0046   |   0.1962  |   0.0743  |
| 1153 |         michellemalkin.com        |   1   |  229   |    0.1538   |    0.0142   |   0.2311  |   0.0683  |
| 1041 |           hughhewitt.com          |   1   |  225   |    0.1511   |    0.0155   |   0.2369  |   0.068   |
| 1479 |          wizbangblog.com          |   1   |  220   |    0.1478   |    0.0183   |   0.2439  |   0.0584  |
| 1101 |         lashawnbarber.com         |   1   |  219   |    0.1471   |    0.0163   |   0.2457  |   0.0557  |
| 363  |          liberaloasis.com         |   0   |  216   |    0.1451   |    0.0143   |   0.2235  |   0.119   |
| 180  |      digbysblog.blogspot.com      |   0   |  189   |    0.1269   |    0.0136   |   0.2298  |   0.1179  |
|  99  |      bodyandsoul.typepad.com      |   0   |  182   |    0.1222   |    0.0064   |   0.242   |   0.1112  |
| 454  |     newleftblogs.blogspot.com     |   0   |  179   |    0.1202   |    0.0207   |   0.2469  |   0.0955  |
| 493  |            pandagon.net           |   0   |  171   |    0.1148   |    0.0037   |   0.2199  |   0.1081  |
| 387  |      madkane.com/notable.html     |   0   |  170   |    0.1142   |    0.0203   |   0.2652  |   0.1025  |
| 642  |            talkleft.com           |   0   |  162   |    0.1088   |    0.0049   |   0.2143  |   0.1074  |
| 880  |         cayankee.blogs.com        |   1   |  142   |    0.0954   |    0.0061   |   0.2671  |   0.047   |
| 856  |           blogsofwar.com          |   1   |  122   |    0.0819   |    0.0073   |    0.26   |   0.0521  |
| 935  |          dalythoughts.com         |   1   |  114   |    0.0766   |    0.0039   |   0.2652  |   0.0534  |
| 1427 |     thomasgalvin.blogspot.com     |   1   |   93   |    0.0625   |    0.0021   |   0.2583  |   0.0512  |
| 467  |     nomoremister.blogspot.com     |   0   |   89   |    0.0598   |    0.0128   |   0.2552  |   0.0735  |
| 765  | acertainslantoflight.blogspot.com |   1   |   78   |    0.0524   |    0.0012   |   0.2552  |   0.0475  |
| 927  |         cynicalnation.com         |   1   |   61   |    0.041    |    0.0033   |   0.2635  |   0.0363  |
| 1351 |            slowplay.com           |   1   |   59   |    0.0396   |    0.0011   |   0.256   |   0.0252  |
+------+-----------------------------------+-------+--------+-------------+-------------+-----------+-----------+

Now let us look at each of the left and right leaning groups

First we will create subgraphs for each of the groups


In [29]:
nodes0=[]
nodes1=[]

In [30]:
for i,j in G.nodes_iter(data=True):
    if j['value']==0:
        nodes0.append(i)
    else:
        nodes1.append(i)

In [31]:
G_node0 = G.subgraph(nodes0)
G_node1 = G.subgraph(nodes1)

In [32]:
G_node0.number_of_nodes()


Out[32]:
758

In [62]:
G_node0.number_of_edges()


Out[62]:
8408

In [33]:
G_node1.number_of_nodes()


Out[33]:
732

In [63]:
G_node1.number_of_edges()


Out[63]:
8994

In [52]:
def createTable(graph):
    deg=nx.degree(graph)
    deg_cent=nx.degree_centrality(graph)
    deg_sort=sorted(deg_cent.iteritems(),key=lambda(k,v):(-v,k))
    bet_cent=nx.betweenness_centrality(graph)
    bet_sort=sorted(bet_cent.iteritems(),key=lambda(k,v):(-v,k))
    clo_cent=nx.closeness_centrality(graph)
    clo_sort=sorted(clo_cent.iteritems(),key=lambda(k,v):(-v,k))
    G2=nx.Graph(graph)
    eig_cent = nx.eigenvector_centrality(G2)
    eig_sort = sorted(eig_cent.iteritems(), key = lambda(k,v):(-v,k))
    names1=[x[0] for x in deg_sort[:10]]
    names2=[x[0] for x in bet_sort[:10]]
    names3=[x[0] for x in clo_sort[:10]]
    names4=[x[0] for x in eig_sort[:10]]
    names=list(set(names1)|set(names2)|set(names3)|set(names4))
    table=[[name,graph.node[name]['label'],graph.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4),round(eig_cent[name],4)] for name in names]
    table=sorted(table,key=lambda x: -x[3])
    t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness','EigenCent'])
    for i in range(0, len(table)):
        t.add_row(table[i])
    return t

In [53]:
print(createTable(G_node0))


+-----+-----------------------------+-------+--------+-------------+-------------+-----------+-----------+
|  ID |             Link            | Value | Degree | Degree Cent | Betweenness | Closeness | EigenCent |
+-----+-----------------------------+-------+--------+-------------+-------------+-----------+-----------+
| 155 |         dailykos.com        |   0   |  338   |    0.4465   |    0.0386   |   0.2132  |   0.1769  |
|  55 |     atrios.blogspot.com     |   0   |  329   |    0.4346   |    0.064    |   0.2524  |   0.1786  |
| 641 |    talkingpointsmemo.com    |   0   |  242   |    0.3197   |    0.0083   |   0.1851  |   0.1538  |
| 363 |       liberaloasis.com      |   0   |  214   |    0.2827   |    0.0307   |   0.2609  |   0.1419  |
| 729 |    washingtonmonthly.com    |   0   |  205   |    0.2708   |    0.012    |   0.206   |   0.1374  |
| 180 |   digbysblog.blogspot.com   |   0   |  182   |    0.2404   |    0.0135   |   0.2462  |   0.1348  |
|  99 |   bodyandsoul.typepad.com   |   0   |  180   |    0.2378   |    0.0136   |   0.2588  |   0.1319  |
| 144 |    corrente.blogspot.com    |   0   |  177   |    0.2338   |    0.0155   |   0.2577  |   0.1252  |
| 454 |  newleftblogs.blogspot.com  |   0   |  174   |    0.2299   |    0.0379   |   0.2716  |   0.1111  |
| 493 |         pandagon.net        |   0   |  165   |    0.218    |    0.0056   |   0.213   |   0.1251  |
| 387 |   madkane.com/notable.html  |   0   |  157   |    0.2074   |    0.0327   |   0.2877  |   0.1149  |
| 512 |    politicalstrategy.org    |   0   |  147   |    0.1942   |    0.0139   |   0.2794  |   0.1239  |
| 202 |   elayneriggs.blogspot.com  |   0   |  133   |    0.1757   |    0.0051   |   0.2618  |   0.103   |
| 524 | presidentboxer.blogspot.com |   0   |  119   |    0.1572   |    0.0049   |   0.2804  |   0.0998  |
|  40 |    anoldsoul.blogspot.com   |   0   |  112   |    0.148    |    0.0064   |   0.2618  |   0.0977  |
| 568 |  robschumacher.blogspot.com |   0   |   83   |    0.1096   |    0.0215   |   0.2494  |   0.0514  |
| 612 |     snunes.blogspot.com     |   0   |   77   |    0.1017   |    0.0018   |   0.254   |   0.0785  |
+-----+-----------------------------+-------+--------+-------------+-------------+-----------+-----------+

In [54]:
print(createTable(G_node1))


+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+
|  ID  |                  Link                 | Value | Degree | Degree Cent | Betweenness | Closeness | EigenCent |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+
| 855  |            blogsforbush.com           |   1   |  463   |    0.6334   |    0.2088   |   0.4447  |   0.1457  |
| 1051 |            instapundit.com            |   1   |  297   |    0.4063   |    0.0237   |   0.2888  |   0.2003  |
| 1153 |           michellemalkin.com          |   1   |  215   |    0.2941   |    0.015    |   0.2778  |   0.1532  |
| 963  |            drudgereport.com           |   1   |  212   |     0.29    |    0.0008   |   0.0055  |   0.1077  |
| 1101 |           lashawnbarber.com           |   1   |  212   |     0.29    |    0.0353   |   0.3668  |   0.1385  |
| 1245 |           powerlineblog.com           |   1   |  210   |    0.2873   |    0.0065   |   0.2476  |   0.1565  |
| 1041 |             hughhewitt.com            |   1   |  209   |    0.2859   |    0.0253   |   0.3385  |   0.1529  |
| 1000 | gevkaffeegal.typepad.com/the_alliance |   1   |  209   |    0.2859   |    0.0343   |   0.3417  |   0.0659  |
| 1479 |            wizbangblog.com            |   1   |  198   |    0.2709   |    0.0214   |   0.3095  |   0.1355  |
| 1112 |    littlegreenfootballs.com/weblog    |   1   |  194   |    0.2654   |    0.0223   |   0.3063  |   0.1539  |
| 1437 |           truthlaidbear.com           |   1   |  159   |    0.2175   |    0.0101   |   0.3056  |   0.1194  |
| 1461 |            vodkapundit.com            |   1   |  150   |    0.2052   |    0.0072   |   0.2866  |   0.1286  |
| 980  |         evangelicaloutpost.com        |   1   |  148   |    0.2025   |    0.0197   |   0.3589  |   0.0816  |
| 880  |           cayankee.blogs.com          |   1   |  131   |    0.1792   |    0.0052   |   0.369   |   0.1117  |
| 892  |         chrenkoff.blogspot.com        |   1   |  115   |    0.1573   |    0.0134   |   0.3308  |   0.0914  |
| 1384 |        techievampire.net/wppol        |   1   |  111   |    0.1518   |    0.0036   |   0.3617  |   0.0967  |
| 935  |            dalythoughts.com           |   1   |   97   |    0.1327   |    0.0046   |   0.3486  |   0.0957  |
| 1135 |           martinipundit.com           |   1   |   93   |    0.1272   |    0.0046   |   0.3482  |   0.0935  |
| 900  |    commonsenserunswild.typepad.com    |   1   |   90   |    0.1231   |    0.0023   |   0.3571  |   0.0944  |
| 966  |             dummocrats.com            |   1   |   80   |    0.1094   |    0.0027   |   0.3449  |   0.0805  |
| 953  |      discerningtexan.blogspot.com     |   1   |   77   |    0.1053   |    0.0012   |   0.3512  |   0.0856  |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+

It seems that the right leaning blogs have higher degree and closeness within themselves

Two blogs stick out, ID 363 and ID 1000 have relatively high degree and centralities within the respective 0 and 1 subgraphs, but are not in the overall top measurements for the whole graph


In [55]:
def getDetails(G, deg, deg_cent, bet_cent, clo_cent, eig_cent, name):
    table=[[name,G.node[name]['label'],G.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4),round(eig_cent[name],4)]]
    table=sorted(table,key=lambda x: -x[3])
    t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness','EigenCent'])
    for i in range(0, len(table)):
        t.add_row(table[i])
    return t

In [56]:
print(getDetails(G,deg,deg_cent,bet_cent,clo_cent,eig_cent,363))


+-----+------------------+-------+--------+-------------+-------------+-----------+-----------+
|  ID |       Link       | Value | Degree | Degree Cent | Betweenness | Closeness | EigenCent |
+-----+------------------+-------+--------+-------------+-------------+-----------+-----------+
| 363 | liberaloasis.com |   0   |  216   |    0.1451   |    0.0143   |   0.2235  |   0.119   |
+-----+------------------+-------+--------+-------------+-------------+-----------+-----------+

In [57]:
print(getDetails(G,deg,deg_cent,bet_cent,clo_cent,eig_cent,1000))


+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+
|  ID  |                  Link                 | Value | Degree | Degree Cent | Betweenness | Closeness | EigenCent |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+
| 1000 | gevkaffeegal.typepad.com/the_alliance |   1   |  212   |    0.1424   |    0.0142   |   0.2374  |   0.022   |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+-----------+

I'm interested to see the make up these nodes' neighbors


In [58]:
def neighbor_details(G, node):
    count0 = 0
    count1 = 0
    for i in G.neighbors(node):
        if G.node[i]['value'] == 0:
            count0 +=1
        else:
            count1 +=1
    return [count0,count1]

In [59]:
neighbor_details(G, 363)


Out[59]:
[115, 0]

In [60]:
neighbor_details(G, 1000)


Out[60]:
[2, 108]

Some t-tests:


In [90]:
from scipy import stats
import numpy as np

In [91]:
stats.ttest_ind((np.array([[deg_cent[name]] for name in nodes0])),(np.array([[deg_cent[name]] for name in nodes1])))


Out[91]:
(array([-1.10924099]), array([ 0.26750547]))

In [92]:
stats.ttest_ind((np.array([[bet_cent[name]] for name in nodes0])),(np.array([[bet_cent[name]] for name in nodes1])))


Out[92]:
(array([-0.05147238]), array([ 0.95895602]))

In [93]:
stats.ttest_ind((np.array([[clo_cent[name]] for name in nodes0])),(np.array([[clo_cent[name]] for name in nodes1])))


Out[93]:
(array([-5.30244343]), array([  1.31485194e-07]))

In [94]:
stats.ttest_ind((np.array([[eig_cent[name]] for name in nodes0])),(np.array([[eig_cent[name]] for name in nodes1])))


Out[94]:
(array([ 6.61865308]), array([  5.04504258e-11]))