In [22]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
big5countries=['England','France','Germany','Italy','Spain']
t=nonloans[nonloans.season>2000]
t[(t.toLeague.isin(['RO1']))&(t.fromLeague!=t.toLeague)&(t.age<23)].drop_duplicates('id').groupby('season')['age'].describe() \
.unstack().drop(['count','std','min','max'],axis=1).plot.line(figsize=(10,10))
Out[22]:
In [37]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
big5countries=['England','France','Germany','Italy','Spain']
leagues=big5leagues
tier3=countryInfo[countryInfo.marketTier==3].index
tier2=countryInfo[countryInfo.marketTier==2].index
tier1=countryInfo[countryInfo.marketTier==1].index
#leagues=['RO1']
t=nonloans[nonloans.season>2000]
t
under=t[(t.toLeague.isin(leagueList))&(t.toCountry.isin(tier2))&(t.fromLeague!=t.toLeague)&(t.fromCountry==t.toCountry)&(t.toCountry.isin(uefa))&(t.age<23)].drop_duplicates('id').groupby('season')['id'].count()
over=t[(t.toLeague.isin(leagueList))&(t.toCountry.isin(tier2))&(t.fromLeague!=t.toLeague)&(t.fromCountry==t.toCountry)&(t.toCountry.isin(uefa))&(t.age>=23)].drop_duplicates('id').groupby('season')['id'].count()
df=DataFrame({'age < 23':under,'age >= 23':over})
df['youth %']=df['age < 23'] / (df['age < 23'] + df['age >= 23'])
df[['age < 23','age >= 23']].plot.area(stacked=True)
#df['youth %'].plot.line()
Out[37]:
In [ ]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.intlCaps>0)&(t.Nationality=='Brazil')]
t=t.merge(DataFrame({'toMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='toCountry',right_index=True)
t.groupby(['season','toMarketTier']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
In [143]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())&(t.intlCaps>0)]
t=t.merge(DataFrame({'toMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='toCountry',right_index=True)
t=t.merge(DataFrame({'fromMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='fromCountry',right_index=True)
t=t[(t.fromMarketTier==3)&(t.toMarketTier==1)]
s=t.groupby(['season','fromCountry']).count()['id'].unstack()
s[s.sum().sort_values(ascending=False)[:10].index].plot.bar(stacked=True,figsize=(10,6))
Out[143]:
In [23]:
t=nonloans
t=t[(t.season>1995)&(t.toCountry.notnull())&t.feeValue>0]
t=t.merge(DataFrame({'toMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='toCountry',right_index=True)
t=t.merge(DataFrame({'fromMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='fromCountry',right_index=True)
s=t.groupby(['season','fromMarketTier']).sum()['feeValue'].unstack()
s[s.sum().sort_values(ascending=False).index] #.plot.bar(stacked=True,figsize=(10,6))
Out[23]:
In [26]:
countryInfo.groupby('marketTier').count()
Out[26]:
In [27]:
585.845068 / 366
739.324534 / 366
#tier3
#s=t.groupby(['season','fromMarketTier']).sum()['feeValue'].unstack()
3057280000 / 161906000.0
Out[27]:
In [127]:
t=nonloans
t=t[(t.season>1995)&(t.toCountry.notnull())&(t.age>21)&(t.toCountry!=t.fromCountry)&(t.intlCaps>0)]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t.groupby(['season','nationalityTier']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[127]:
In [122]:
t=nonloans
t=t[(t.season>1995)&(t.toCountry.notnull())&(t.toCountry!=t.fromCountry)]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t.groupby(['season','nationalityTier']).sum()['feeValue'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[122]:
In [91]:
t=nonloans[(nonloans.feeValue>0)&(nonloans.toLeague.isin(leagueList))&(nonloans.season>1995)]
s=t.groupby(['season','toCountry'])['toTeamId'].nunique().unstack()
s[s.quantile(.8).sort_values(ascending=False)[:20].index].plot.bar(stacked=True,figsize=(15,10))
Out[91]:
In [91]:
t=nonloans[(nonloans.feeValue>0)&(nonloans.toLeague.isin(leagueList))&(nonloans.season>1995)]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
s[s.quantile(.8).sort_values(ascending=False)[:20].index].plot.bar(stacked=True,figsize=(15,10))
Out[91]:
In [88]:
t=nonloans[(nonloans.toCountry!=nonloans.fromCountry)&(nonloans.toLeague.isin(leagueList))&(nonloans.season>1995)]
s=t.groupby(['season','toCountry'])['toTeamId'].nunique().unstack()
s[s.quantile(.8).sort_values(ascending=False)[:15].index].plot.area(stacked=True,figsize=(15,6))
Out[88]:
In [2]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t.merge(DataFrame({'toMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='toCountry',right_index=True)
tt=t[t.nationalityTier=='CAF']
tt=t#[(t.fromCountry=='Greece')&(t.nationalityTier==3)]
tt.groupby(['season','toMarketTier']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[2]:
In [39]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t[t.nationalityTier==2]
t.groupby(['season','Nationality']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[39]:
In [40]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t[t.nationalityTier==1]
t.groupby(['season','Nationality']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[40]:
In [42]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t[t.nationalityTier=='CONMEBOL']
t.groupby(['season','Nationality']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[42]:
In [43]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t[t.nationalityTier=='CAF']
t.groupby(['season','Nationality']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[43]:
In [44]:
t=nonloans
t=t[(t.season>1995)&(t.fromCountry!=t.toCountry)&(t.toCountry.notnull())]
t=t.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
t=t[t.nationalityTier=='AFC']
t.groupby(['season','Nationality']).count()['id'].unstack().plot.bar(stacked=True,figsize=(10,6))
Out[44]:
In [8]:
DataFrame({'toMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)})
Out[8]:
In [64]:
t=transfers[transfers.toTeamName=='Unattached']
t.groupby('season')['age'].median().plot.line()
Out[64]:
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers = pd.read_csv('../data/merged/transfers_q.csv')
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}
teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
.sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()
leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1','IR1']
countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')
top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index
uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index