In [2]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[nonloans.season.between(1995,2015)]
#t=t[t.feeValue>0]
s=t[t.fromCountry!=t.toCountry].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:100]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/transfers.csv',index=False)
s
Out[2]:
In [2]:
t=loans[loans.season.between(1995,2015)]
s=t[t.fromCountry!=t.toCountry].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s=s[:100]
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/loans.csv',index=False)
s
Out[2]:
In [27]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/transfer_change.csv',index=False)
s
Out[27]:
In [3]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=loans
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/loan_change.csv',index=False)
s
Out[3]:
In [2]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[nonloans.feeValue>0]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/paid_transfer_change.csv',index=False)
s
Out[2]:
In [ ]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[~nonloans.Nationality.isin(uefa)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/non_european_transfer_change.csv',index=False)
s
In [ ]:
t=nonloans[~nonloans.Nationality.isin(uefa)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(0)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/non_euro_transfer_change.csv',index=False)
s
In [5]:
t=nonloans[nonloans.Nationality.isin(tier3)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(0)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/tier3_transfer_change.csv',index=False)
s
Out[5]:
In [5]:
t=nonloans[(nonloans.Nationality=='Brazil')&(nonloans.feeValue>0)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!='Brazil')&(t.toCountry!='Brazil')&(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!='Brazil')&(t.toCountry!='Brazil')&(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 1,labels=False)*5
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/brazil_transfer_change.csv',index=False)
s
Out[5]:
In [4]:
t=nonloans[(nonloans.Nationality=='Brazil')&(nonloans.feeValue>0)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 1,labels=False)*5
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/paid_brazil_transfer_change.csv',index=False)
s
Out[4]:
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}
teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
.sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()
leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1']
countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')
top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index
uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index
tier3=countryInfo[countryInfo.marketTier==3].index
leagues=['GB1']
eplIds=nonloans[(nonloans.fromLeague.isin(leagues))|(nonloans.toLeague.isin(leagues))].id
nonEnglandIds=nonloans[(nonloans.fromCountry!='England')&(nonloans.toCountry!='England')].id
t=nonloans
s=t[(t.id.isin(eplIds))&(t.fromCountry!=t.toCountry)&(t.toCountry!='England')&(t.fromCountry!='England')].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:80]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 6,labels=False)*2
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/epl_transfers.csv',index=False)
s
Out[1]:
In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagues=['L1','GB1','FR1','ES1','IT1']
countries=['Germany','England','France','Spain','Italy']
eplIds=nonloans[(nonloans.fromLeague.isin(leagues))|(nonloans.toLeague.isin(leagues))].id
t=nonloans
s=t[(t.id.isin(eplIds))&(t.fromCountry!=t.toCountry)&(~t.fromCountry.isin(countries))&(~t.toCountry.isin(countries))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:100]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/tier1_transfers.csv',index=False)
s
Out[2]:
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}
teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
.sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()
leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1']
countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')
top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index
uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index
tier3=countryInfo[countryInfo.marketTier==3].index