In [251]:
leagues=['NL1','PO1']
df1=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['fromLeague'].isin(leagues))] \
.rename(columns={'fromLeague':'Origin'}).groupby(['Age','Origin','period'])['id'].count().unstack().unstack() \
.rolling(window=1,center=False).mean().rename(columns=leagueCodeMapping)
df2=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['toLeague'].isin(leagues))] \
.rename(columns={'toLeague':'Destination'}).groupby(['Age','Destination','period'])['id'].count().unstack().unstack() \
.rolling(window=1,center=False).mean().rename(columns=leagueCodeMapping)
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(30, 16),sharex=True,sharey=True)
#df['(1995, 2000]'].plot(ax=axes[0,0], title="Age Distribution of Int'l Transfers, \nby Origin (1996-2000)", colormap='Set1', legend=False)
df1['(2000, 2005]'].plot(ax=axes[0,0], title="Age Distribution of Int'l Transfers, \nby Origin (2001-2005)", colormap='Set1', legend=True)
df1['(2005, 2010]'].plot(ax=axes[0,1], title="Age Distribution of Int'l Transfers, \nby Origin (2006-2010)", colormap='Set1', legend=True)
df1['(2010, 2015]'].plot(ax=axes[0,2], title="Age Distribution of Int'l Transfers, \nby Origin (2011-2015)", colormap='Set1', legend=True)
df2['(2000, 2005]'].plot(ax=axes[1,0], title="Age Distribution of Int'l Transfers, \nby Destination (2001-2005)", colormap='Set1', legend=True)
df2['(2005, 2010]'].plot(ax=axes[1,1], title="Age Distribution of Int'l Transfers, \nby Destination (2006-2010)", colormap='Set1', legend=True)
df2['(2010, 2015]'].plot(ax=axes[1,2], title="Age Distribution of Int'l Transfers, \nby Destination (2011-2015)", colormap='Set1', legend=True)
plt.subplots_adjust(wspace=0,hspace=.1)
fig.savefig('age_by_league.png')
In [128]:
loans[(loans['fromCountry']!=loans['toCountry'])&(loans['fromCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
.rename(columns={'fromCountry':'Country'}).groupby(['Age','Country'])['id'].count().unstack() \
.rolling(window=3,center=False).mean().plot.line(figsize=(9,6),title='Number of Foreign Loans, by Age & Loaning Country',colormap='Set1')
Out[128]:
In [12]:
loans[(loans['fromCountry']!=loans['toCountry'])&(loans['toCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
.rename(columns={'toCountry':'Country'}).groupby(['Age','Country'])['id'].count().unstack() \
.rolling(window=3,center=False).mean().plot.line(figsize=(9,6),title='Number of Foreign Loans, by Age & Receiving Country',colormap='Set1')
Out[12]:
In [11]:
nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['toLeague'].isin(['GB1','ES1','L1','IT1','FR1','NL1','PO1']))&(nonloans['season']>=2010)] \
.rename(columns={'toLeague':'Destination'}).groupby(['Age','Destination'])['id'].count().unstack() \
.rolling(window=3,center=False).mean().rename(columns=leagueCodeMapping).plot.line(figsize=(9,6),xlim=(15,45),title='Number of International Transfers, by Age & Destination')
Out[11]:
In [9]:
t=transfers
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 8),sharex=True,sharey=True)
isLoan=t.join(pd.Series(np.where(t['isLoan'],'Loan','Transfer')).rename('Move Type')).groupby(['Age','Move Type','period'])['id'].count().unstack().unstack()
foreignVdomestic =t.join(pd.Series(np.where(t['fromCountry']==t['toCountry'],'Domestic','International')).rename('Move Type'))\
.groupby(['Age','Move Type','period'])['id'].count().unstack().unstack()
isLoan['(2000, 2005]'].plot(ax=axes[0,0],title='2001-2005')
isLoan['(2005, 2010]'].plot(ax=axes[0,1],title='2006-2010')
isLoan['(2010, 2015]'].plot(ax=axes[0,2],title='2011-2015')
foreignVdomestic['(2000, 2005]'].plot(ax=axes[1,0])
foreignVdomestic['(2005, 2010]'].plot(ax=axes[1,1])
foreignVdomestic['(2010, 2015]'].plot(ax=axes[1,2])
plt.subplots_adjust(wspace=0,hspace=0)
fig.savefig('loan_intl.png')
In [6]:
t=transfers[transfers['season'].between(2001,2015)]
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 4),sharex=True,sharey=False)
t.groupby('Age')['id'].count().plot(ax=axes[0],title=u'# of transfers\nby age')
t.groupby('Age')['feeValue'].sum().plot(ax=axes[2], title=u'total declared transfer fees (€)\nby age')
t.groupby('Age')['mv'].sum().plot(ax=axes[1],title=u'total market value of transfers (€)\nby age')
plt.subplots_adjust(wspace=.1,hspace=0)
fig.savefig('age_curves.png')
In [5]:
t[t['feeValue']>0].groupby('Age')
Out[5]:
In [4]:
t=transfers[transfers['fromCountry']!=transfers['toCountry']]
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 4),sharex=True,sharey=False)
t[t['feeValue']==0].groupby('age')['id'].count().plot(ax=axes)
t[t['feeValue']>0].groupby('age')['id'].count().plot(ax=axes)
plt.subplots_adjust(wspace=.1,hspace=0)
fig.savefig('age_curves_paid_free.png')
In [6]:
t[t['feeValue']>0]['age'].describe()
Out[6]:
In [7]:
t[t['feeValue']>0]['age'].describe()
Out[7]:
In [8]:
t=transfers
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 6),sharex=True,sharey=True)
g=t.groupby(['age','positionGroup','period'])['id'].count().unstack().unstack() #.plot(ax=axes)
g['(2000, 2005]'].plot(ax=axes[0],title='2001-2005')
g['(2005, 2010]'].plot(ax=axes[1],title='2006-2010')
g['(2010, 2015]'].plot(ax=axes[2],title='2011-2015')
plt.subplots_adjust(wspace=0,hspace=0)
fig.savefig('loan_intl.png')
In [32]:
nonloans[(nonloans.feeValue>0)&(nonloans.season>=2000)].groupby(['season','positionGroup'])['feeValue'].mean().unstack().plot.line(figsize=(20,10))
Out[32]:
In [55]:
t=nonloans
t[(t.feeValue>0)&(t.season>=2000)&(t.positionGroup.isin(['Midfield','d']))].groupby(['season'])['feeValue'].mean().rolling(window=1,center=False).mean().plot.line(figsize=(10,5))
Out[55]:
In [280]:
t=nonloans
x=t[(t.toLeague.isin(leagueList))].groupby(['season','toCountry'])['feeValue'].count().unstack().rolling(window=1,center=False).mean()#.plot.line(figsize=(20,20))
y=t[(t.fromLeague.isin(leagueList))].groupby(['season','fromCountry'])['feeValue'].count().unstack().rolling(window=1,center=False).mean()#.plot.line(figsize=(20,20))
z=x.join(y,lsuffix='_import',rsuffix='_export').sort_index(axis=1)
for country in x.columns:
z[country+'_net']=z[country+'_import']-z[country+'_export']
z[country+'_total']=z[country+'_import']+z[country+'_export']
z=z.sort_index(axis=1)
zt=z.T
w=zt.reset_index()['index'].str.split('_',expand=True)
w.index=zt.index
w.columns=['country','flow']
zt=zt.join(w)
zt=zt.fillna(0).groupby(['country','flow']).sum()
zt.reset_index(level=0).groupby('country').apply(lambda x: x.ix[['import','export','net','total']]).drop('country',axis=1).unstack().to_excel('../data/output/import_export.xlsx')
ztr=zt.stack().unstack(level=0).swaplevel().reset_index().groupby(['season','flow']).apply(lambda x:x.T.drop(['season','flow']).rename(columns=lambda x:'number').sort_values('number',ascending=False).reset_index()).stack().unstack(level=[0,1,3])
ztr.to_excel('../data/output/import_export_ranked.xlsx')
In [277]:
zt.stack().unstack(level=0).swaplevel().reset_index().groupby(['season','flow']).apply(lambda x:x.T.drop(['season','flow']).rename(columns=lambda x:'number').sort_values('number',ascending=False).reset_index()).stack().unstack(level=[0,1,3])
Out[277]:
In [166]:
zt.reset_index(level=0)
Out[166]:
In [140]:
z.T.index
Out[140]:
In [121]:
Out[121]:
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers = pd.read_csv('../data/merged/transfers.csv')
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}
teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
.sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()
leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1']