In [92]:
%pylab inline
from pandas.io import gbq
import networkx as nx
In [90]:
query = """
SELECT CORR(a.c,b.c) corr, a.Actor1CountryCode, b.Actor1CountryCode, COUNT(*) count
FROM
(SELECT Actor1CountryCode, SQLDATE, COUNT(*) c
FROM [gdelt-bq:full.events]
GROUP EACH BY 1,2
HAVING c > 30
) a
JOIN EACH (
SELECT Actor1CountryCode, SQLDATE, COUNT(*) c
FROM [gdelt-bq:full.events]
GROUP EACH BY 1,2
HAVING c > 30
) b
ON a.SQLDATE = b.SQLDATE
WHERE a.Actor1CountryCode > b.Actor1CountryCode
GROUP EACH BY 2, 3
HAVING count > 100
AND corr > 0.85
ORDER BY 1 DESC
"""
countries = gbq.read_gbq(query)
In [91]:
G = nx.from_edgelist(countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode']].get_values())
nx.draw(G)
In [80]:
x = countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode', 'corr']]
x
Out[80]:
In [94]:
G = nx.Graph()
G.add_weighted_edges_from(countries[[u'a_Actor1CountryCode', u'b_Actor1CountryCode', 'corr']].get_values())
nx.draw_spring(G)