In [10]:
%matplotlib inline
import copy
import matplotlib.colors
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch
from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.basemap
reload(mpl_toolkits.basemap)
from mpl_toolkits.basemap import Basemap
import seaborn as sns
sns.set_style('white')
import geocoder
import pycountry
from pymendez import auth
from pysurvey.plot import setup_sns as setup, icolorbar
import pandas as pd
import numpy as np
import geoplotlib, geoplotlib.utils
In [2]:
key = auth.auth('google_geocode', 'key')
In [5]:
addresses = pd.read_csv('/Users/ajmendez/Desktop/panama/offshore_leaks_csvs-20160621/Addresses.csv')
print len(addresses)
addresses.head(1)
Out[5]:
In [43]:
inter = pd.read_csv('/Users/ajmendez/Desktop/panama/offshore_leaks_csvs-20160621/all_edges.csv')
print len(inter)
inter.head(1)
Out[43]:
In [42]:
inter.columns
Out[42]:
In [6]:
countries = pd.DataFrame(data={'code':addresses['country_codes'].dropna().unique()},
columns=['code', 'naddr', 'name', 'lat', 'lon'])
In [7]:
N = addresses['country_codes'].value_counts()
for i, code in enumerate(countries['code']):
if (code == 'XXX'):
continue
name = pycountry.countries.get(alpha3=code).name
geo = geocoder.google(name, key=key)
countries.loc[i, 'name'] = name
countries.loc[i, 'lon'] = geo.lng
countries.loc[i, 'lat'] = geo.lat
countries.loc[i, 'naddr'] = N[code]
In [8]:
countries['logaddr'] = np.log10(np.array(countries['naddr'].astype(np.float)))
In [9]:
# lon_0 is central longitude of projection.
# resolution = 'c' means use crude resolution coastlines.
plt.figure(figsize=(12,6))
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.drawcoastlines()
m.fillcontinents(color='black', lake_color='steelblue')
# draw parallels and meridians.
# m.drawparallels(np.arange(-90.,120.,30.))
# m.drawmeridians(np.arange(0.,420.,60.))
m.drawmapboundary(fill_color='steelblue')
x,y = m(np.array(countries['lon']), np.array(countries['lat']))
sca = m.scatter(x,y, c=np.log10(np.array(countries['naddr'].astype(np.float))),
s=50, zorder=20, lw=0, alpha=0.8,
cmap=plt.cm.Reds)
# plt.title("Mollweide Projection")
# plt.show()
In [11]:
pop = pd.read_csv("/Users/ajmendez/Downloads/countries.csv", delimiter=';')
pop.head()
Out[11]:
In [12]:
countries['pop'] = 0
for code in countries['code']:
if code == 'XXX':
continue
alpha2 = pycountry.countries.get(alpha3=code).alpha2
tmp = pop[pop['Country code'] == alpha2]
if len(tmp)== 0:
continue
countries.loc[countries['code'] == code, 'pop'] = tmp['Population'].values[0]
In [13]:
# Missing population data -- drop for simplicity
countries[countries['pop'] == 0]
Out[13]:
In [14]:
countries['ratio'] = (countries['naddr'] / (1+countries['pop'])).map(np.log10)
countries['logpop'] = countries['pop'].map(np.log10)
In [15]:
plt.hist(countries[countries['pop']>0]['ratio'].values)
plt.xlabel('Log Ratio')
Out[15]:
In [16]:
countries[countries['pop']>0].sort_values('ratio', ascending=False).head(15)
Out[16]:
In [17]:
# lon_0 is central longitude of projection.
# resolution = 'c' means use crude resolution coastlines.
fig = plt.figure(figsize=(18,9))
ax = fig.add_subplot(111)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.drawmapboundary(fill_color='steelblue')
# m.fillcontinents(color='black', lake_color='steelblue')
shapefile = '/Users/ajmendez/Downloads/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3'
m.readshapefile(shapefile, 'countries', color=(1,1,1,0.5), linewidth=0.2, drawbounds=False)
patches = []
colors = []
cmap = copy.copy(plt.cm.Greys)
cmap.set_bad((1,0,0, 0.5))
norm = matplotlib.colors.Normalize(-6, -3, clip=True)
patches2 = []
for shape,info in zip(m.countries, m.countries_info):
tmp = countries.loc[ (countries['code'] == info['ISO3']) &
(countries['ratio'] > -6) &
(countries['pop'] > 0) ]
if (len(tmp) == 0):
patches2.append(Polygon(np.array(shape), True))
continue
color = norm(tmp['ratio'])[0]
poly = Polygon(np.array(shape), True)
patches.append(poly)
colors.append(color)
pc = PatchCollection(patches2, facecolors=(1,1,1,0.2), linewidths=0, zorder=2)
ax.add_collection(pc)
pc = PatchCollection(patches, cmap=cmap, linewidths=0, zorder=2)
pc.set_array(np.array(colors))
pc.set_clim(0,1)
ax.add_collection(pc)
None
In [21]:
gdp = pd.read_csv("/Users/ajmendez/tmp/world/API_NY.GDP.MKTP.CD_DS2_en_csv_v2.csv",
comment='#')
gdp.head()
Out[21]:
In [24]:
countries['gdp'] = 0
for code in countries['code']:
if code == 'XXX':
continue
tmp = gdp[gdp['Country Code'] == code]
if len(tmp)== 0:
continue
countries.loc[countries['code'] == code, 'gdp'] = gdp['2011']
In [27]:
countries[countries['gdp'] == 0]
Out[27]:
In [47]:
countries['lognaddr'] = countries['naddr'].map(np.log10)
countries['loggdp'] = countries['gdp'].map(np.log10)
countries['gdpratio'] = (countries['naddr'] /(1 + countries['gdp'])).map(np.log10)
In [35]:
plt.hist(countries[countries['gdp'] > 0]['gdpratio'].values)
Out[35]:
In [37]:
countries[countries['gdp']>0].sort_values('gdpratio', ascending=False).head(15)
Out[37]:
In [39]:
# lon_0 is central longitude of projection.
# resolution = 'c' means use crude resolution coastlines.
fig = plt.figure(figsize=(18,9))
ax = fig.add_subplot(111)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.drawmapboundary(fill_color='steelblue')
# m.fillcontinents(color='black', lake_color='steelblue')
shapefile = '/Users/ajmendez/Downloads/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3'
m.readshapefile(shapefile, 'countries', color=(1,1,1,0.5), linewidth=0.2, drawbounds=False)
patches = []
colors = []
cmap = copy.copy(plt.cm.Greys)
cmap.set_bad((1,0,0, 0.5))
norm = matplotlib.colors.Normalize(-14, -5, clip=True)
patches2 = []
for shape,info in zip(m.countries, m.countries_info):
tmp = countries.loc[ (countries['code'] == info['ISO3']) &
(countries['gdpratio'] > -14) &
(countries['gdp'] > 0) ]
if (len(tmp) == 0):
patches2.append(Polygon(np.array(shape), True))
continue
color = norm(tmp['gdpratio'])[0]
poly = Polygon(np.array(shape), True)
patches.append(poly)
colors.append(color)
pc = PatchCollection(patches2, facecolors=(1,1,1,0.2), linewidths=0, zorder=2)
ax.add_collection(pc)
pc = PatchCollection(patches, cmap=cmap, linewidths=0, zorder=2)
pc.set_array(np.array(colors))
pc.set_clim(0,1)
ax.add_collection(pc)
None
In [49]:
countries.plot('lognaddr', 'logpop', kind='scatter')
Out[49]:
In [ ]: