In [1]:
import numpy
import pandas as pd
from matplotlib import pyplot
import seaborn as sns
import requests
from matplotlib.ticker import MaxNLocator
%matplotlib inline
sns.set(style='ticks')
In [18]:
#response = requests.get('http://secret-url', stream=True)
In [3]:
df = pd.read_json(response.content.decode('utf-8'))
In [4]:
df.shape[0]
Out[4]:
In [5]:
df= df[df['format'].isin(['iron'])]
In [6]:
df.shape[0]
Out[6]:
In [7]:
df = df[['date','place','country']]
In [8]:
df.head()
Out[8]:
In [9]:
df['Year'] = df.apply(lambda x: x.date.year, axis=1)
df['Ironman'] = df['place']
df['Country'] = df['country']
In [10]:
df = df[df['Year'] >= 2005]
In [11]:
df.sample(1)
Out[11]:
In [12]:
df = df[['Year','Ironman','Country']]
In [13]:
df.Ironman[df.Ironman=='subicbayphilippines'] = 'Philippines'
df.Ironman[df.Ironman=='mardelplata'] = 'Mar del plata'
df.Ironman[df.Ironman=='emiliaromagna'] = 'Emilia romagna'
df.Ironman[df.Ironman=='santarosa'] = 'Santa rosa'
df.Ironman[df.Ironman=='northcarolina'] = 'North carolina'
df.Ironman[df.Ironman=='laketahoe'] = 'Lake Tahoe'
df.Ironman[df.Ironman=='loscabos'] = 'Los Cabos'
df.Ironman[df.Ironman=='monttremblant'] = 'Mont tremblant'
df.Ironman[df.Ironman=='coeurdalene'] = 'Coeur d alene'
df.Ironman[df.Ironman=='westernaustralia'] = 'Western Australia'
df.Ironman[df.Ironman=='southafrica'] = 'south africa'
df.Ironman[df.Ironman=='lakeplacid'] = 'lake placid'
df.Ironman[df.Ironman=='newzealand'] = 'New zealand'
df.Ironman[df.Ironman=='vitoriagasteiz'] = 'Vitoria Gasteiz'
df.Ironman[df.Ironman=='worldchampionship'] = 'Kona'
In [14]:
df.Ironman = df.Ironman.str.title() + ' (' + df['Country'] + ')'
In [15]:
df.head(1)
Out[15]:
In [16]:
#fg = seaborn.FacetGrid(data=df, hue='Place', hue_order= df.Place, aspect=1.61, size=3)
#sns.factorplot(x="Year", y="Place", hue="Place", col="Year", data=df, kind="swarm");
a4_dims = (5, 9)
fig, ax = pyplot.subplots(figsize=a4_dims)
#dict_colors = {"Tallinn":"blue", "Haugesund":"green"}
#ax.get_yticklabels()[0].set_color((0.2980392156862745, 0.4470588235294118, 0.6901960784313725))
#ax.get_yticklabels()[1].set_color((0.3333333333333333, 0.6588235294117647, 0.40784313725490196))
#ax.get_yticklabels()[2].set_color('red')
pal=sns.color_palette("deep", 100)
sns.stripplot(x="Year", y="Ironman", ax=ax, data=df, palette=pal);
ax.invert_yaxis()
# force ticks being displayed as integer values
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
[label.set_color( pal[i] ) for i, label in enumerate(ax.get_yticklabels())]
Out[16]:
In [17]:
# count the number of ironman events per year
for y in range(2005,2020):
ironmans_per_year = df[df['Year'] == y].shape[0]
print('%d: %d ironmen' % (y, ironmans_per_year))