In [92]:
%pylab inline
from pandas.io import gbq
import networkx as nx


Populating the interactive namespace from numpy and matplotlib

In [90]:
query = """
SELECT CORR(a.c,b.c) corr, a.Actor1CountryCode, b.Actor1CountryCode, COUNT(*) count
FROM
(SELECT Actor1CountryCode, SQLDATE, COUNT(*) c
FROM [gdelt-bq:full.events] 
GROUP EACH BY 1,2
HAVING c > 30
) a
JOIN EACH (
SELECT Actor1CountryCode, SQLDATE, COUNT(*) c
FROM [gdelt-bq:full.events] 
GROUP EACH BY 1,2
HAVING c > 30
) b
ON a.SQLDATE = b.SQLDATE
WHERE a.Actor1CountryCode > b.Actor1CountryCode
GROUP EACH BY 2, 3
HAVING count > 100
AND corr > 0.85
ORDER BY 1 DESC
"""
countries = gbq.read_gbq(query)


Waiting on bqjob_r33880feb8302103a_000001462948da54_13 ... (4s) Current status: DONE   

In [91]:
G = nx.from_edgelist(countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode']].get_values())
nx.draw(G)



In [80]:
x = countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode', 'corr']]
x


Out[80]:
a_Actor1CountryCode b_Actor1CountryCode corr
0 USA CAN 0.951479
1 USA GBR 0.947686
2 GBR AUS 0.941443
3 GBR CAN 0.941280
4 CAN AUS 0.938083
5 USA AUS 0.933343
6 GBR CHN 0.931016
7 GBR DEU 0.924600
8 ZAF AFR 0.920514
9 USA CHN 0.918271
10 GBR FRA 0.917056
11 GBR EUR 0.914234
12 USA IND 0.911673
13 CHN CAN 0.910226
14 CHN AUS 0.908378
15 GBR AFR 0.906980
16 EUR DEU 0.906792
17 USA DEU 0.906668
18 EUR CAN 0.906349
19 USA EUR 0.904783
20 USA FRA 0.904513

21 rows × 3 columns


In [94]:
G = nx.Graph()
G.add_weighted_edges_from(countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode', 'corr']].get_values())
nx.draw_spring(G)