In [64]:
import pandas as pd
from pycountry_convert import country_alpha2_to_continent_code
import matplotlib.pyplot as pl
import matplotlib.ticker as ticker
import seaborn as sns
In [97]:
df = pd.read_csv("all-coordinates.csv")
df['country_short'][df['country_short'] == 'VA'] = 'IT'
df['continent'] = df.country_short.apply(country_alpha2_to_continent_code)
df.to_csv("kepler-publication-coordinates.csv")
In [89]:
df = df[(df.year > 2009) & (df.year < 2019) & (df.science == 'astrophysics')]
In [90]:
summ = df.groupby(['year', 'continent']).count()['bibcode']
summ
Out[90]:
In [91]:
# Normalize by total publications per year
norm = 100 * summ / df.groupby('year').count()['bibcode']
# fill missing values with zeros:
norm = norm.reindex(pd.MultiIndex.from_product([norm.index.levels[0], norm.index.levels[1]]), fill_value=0)
norm
Out[91]:
In [95]:
labels = {'NA': 'North America', 'EU': 'Europe', 'AS': 'Asia',
'AF': 'Africa', 'SA': 'South America', 'OC': 'Oceania'}
width = {'NA': 4, 'EU': 3, 'AS': 2, 'SA': 1, 'OC': 1, 'AF': 1}
fig = pl.figure(figsize=(7.5, 3.), dpi=300)
ax = fig.add_subplot(111)
ax.tick_params(length=0)
[ax.spines[loc].set_visible(False) for loc in ['top', 'right', 'bottom', 'left']]
pl.hlines(range(0, 101, 20), xmin=2000, xmax=2030, color='#dddddd', lw=1)
for continent in ['NA', 'EU', 'AS', 'SA', 'OC', 'AF']:
pl.plot(norm.xs(continent, level=1).index.values,
norm.xs(continent, level=1).values,
label=labels[continent],
linewidth=width[continent])
pl.xlim([2009.5, 2018])
pl.ylim([-5, 100])
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter("%d%%"))
pl.xticks(range(2010, 2019, 1))
pl.yticks(range(0, 101, 20))
pl.legend(loc='upper left', bbox_to_anchor=(1.05, 1), ncol=1)
pl.ylabel("Fraction of publications", size=12)
pl.tight_layout()
pl.savefig("astrophysics-publications-by-continent.png")
In [ ]: