In [2]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[nonloans.season.between(1995,2015)]
#t=t[t.feeValue>0]
s=t[t.fromCountry!=t.toCountry].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:100]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/transfers.csv',index=False)
s


Out[2]:
source target id value decile
0 England Scotland 612 1.000000 9
1 Brazil Portugal 597 0.975490 9
2 Scotland England 568 0.928105 9
3 France Belgium 463 0.756536 9
4 Slovenia Austria 413 0.674837 9
5 Bosnia-Herzegovina Croatia 399 0.651961 9
6 Slovakia Czech Republic 397 0.648693 9
7 France England 380 0.620915 9
8 Czech Republic Slovakia 377 0.616013 9
9 Croatia Bosnia-Herzegovina 370 0.604575 9
10 Belgium Netherlands 359 0.586601 8
11 Serbia Bosnia-Herzegovina 347 0.566993 8
12 Greece Cyprus 345 0.563725 8
13 Germany Turkey 314 0.513072 8
14 Netherlands Belgium 297 0.485294 8
15 Cyprus Greece 278 0.454248 8
16 France Luxembourg 274 0.447712 8
17 Ukraine Russia 271 0.442810 8
18 Spain England 268 0.437908 8
19 Portugal Brazil 260 0.424837 8
20 England Ireland 249 0.406863 7
21 Austria Germany 246 0.401961 7
22 Portugal Spain 241 0.393791 7
23 Portugal Cyprus 235 0.383987 7
24 Bosnia-Herzegovina Serbia 233 0.380719 7
25 Austria Slovenia 229 0.374183 7
26 Belgium France 227 0.370915 7
27 Germany Austria 225 0.367647 7
28 Croatia Slovenia 213 0.348039 7
29 Sweden Norway 210 0.343137 7
... ... ... ... ... ...
70 England United States 138 0.225490 2
71 Switzerland France 137 0.223856 2
72 Portugal Romania 133 0.217320 2
73 Sweden Denmark 127 0.207516 2
74 Turkey Germany 126 0.205882 2
75 England Italy 125 0.204248 2
76 Brazil Spain 124 0.202614 2
77 Denmark Norway 124 0.202614 2
78 Croatia Germany 123 0.200980 2
79 Switzerland Italy 121 0.197712 2
80 Denmark Sweden 120 0.196078 1
81 Spain Argentina 118 0.192810 1
82 Germany Poland 117 0.191176 1
83 Spain France 117 0.191176 1
84 Portugal England 117 0.191176 1
85 Germany Greece 116 0.189542 1
86 Serbia Greece 114 0.186275 1
87 Portugal France 113 0.184641 1
88 Brazil Greece 113 0.184641 1
89 Ireland Northern Ireland 112 0.183007 1
90 Italy France 111 0.181373 0
91 Cyprus Portugal 111 0.181373 0
92 Finland Sweden 111 0.181373 0
93 Luxembourg France 110 0.179739 0
94 Luxembourg Germany 109 0.178105 0
95 Austria Croatia 106 0.173203 0
96 Romania Hungary 106 0.173203 0
97 Norway Denmark 106 0.173203 0
98 Slovakia Poland 105 0.171569 0
99 Belgium Germany 104 0.169935 0

100 rows × 5 columns


In [2]:
t=loans[loans.season.between(1995,2015)]
s=t[t.fromCountry!=t.toCountry].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s=s[:100]
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/loans.csv',index=False)
s


Out[2]:
source target id value decile
0 England Scotland 262 1.000000 9
1 Brazil Portugal 173 0.660305 9
2 Italy Spain 169 0.645038 9
3 England Spain 112 0.427481 9
4 Portugal Spain 99 0.377863 9
5 Scotland England 93 0.354962 9
6 Belgium Netherlands 92 0.351145 9
7 France Belgium 91 0.347328 9
8 Czech Republic Slovakia 90 0.343511 9
9 England France 90 0.343511 9
10 France England 89 0.339695 8
11 Portugal Brazil 89 0.339695 8
12 Italy England 83 0.316794 8
13 England Italy 82 0.312977 8
14 England Belgium 81 0.309160 8
15 England Netherlands 81 0.309160 8
16 Spain Portugal 67 0.255725 8
17 Italy Switzerland 64 0.244275 8
18 Ukraine Russia 61 0.232824 8
19 Spain England 60 0.229008 8
20 Italy France 59 0.225191 7
21 Italy Slovenia 58 0.221374 7
22 Italy Belgium 56 0.213740 7
23 Italy Romania 55 0.209924 7
24 Spain Italy 51 0.194656 7
25 Slovakia Czech Republic 51 0.194656 7
26 France Spain 48 0.183206 7
27 Italy Portugal 47 0.179389 7
28 England Germany 45 0.171756 7
29 Brazil Italy 43 0.164122 7
... ... ... ... ... ...
70 Portugal Greece 26 0.099237 2
71 Switzerland Italy 26 0.099237 2
72 Germany England 26 0.099237 2
73 Brazil Japan 25 0.095420 2
74 Brazil Spain 24 0.091603 2
75 Croatia Italy 24 0.091603 2
76 France Italy 24 0.091603 2
77 Norway Sweden 23 0.087786 1
78 Argentina Greece 23 0.087786 1
79 Italy Turkey 23 0.087786 1
80 Germany Turkey 23 0.087786 1
81 England Greece 23 0.087786 1
82 Croatia Bosnia-Herzegovina 22 0.083969 1
83 Russia Belarus 22 0.083969 1
84 Italy Germany 22 0.083969 1
85 Russia Kazakhstan 22 0.083969 1
86 England Turkey 22 0.083969 1
87 Italy Hungary 22 0.083969 1
88 Austria Germany 22 0.083969 1
89 Croatia Slovenia 21 0.080153 0
90 France Greece 21 0.080153 0
91 Portugal Belgium 21 0.080153 0
92 Portugal Italy 21 0.080153 0
93 Argentina Italy 20 0.076336 0
94 England United States 20 0.076336 0
95 Italy Netherlands 20 0.076336 0
96 Brazil Germany 20 0.076336 0
97 Spain Belgium 19 0.072519 0
98 Brazil Korea, South 18 0.068702 0
99 Belgium Israel 18 0.068702 0

100 rows × 5 columns


In [27]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/transfer_change.csv',index=False)
s


Out[27]:
source target before id diff value decile
8 Slovenia Austria 81.0 206 0.253312 1.000000 9
1 England Scotland 145.0 205 -0.303273 0.995146 9
100 Serbia Bosnia-Herzegovina 24.0 194 2.983525 0.941748 9
43 Croatia Bosnia-Herzegovina 39.0 193 1.438764 0.936893 9
2 Scotland England 128.0 187 -0.280040 0.907767 9
23 Bosnia-Herzegovina Croatia 55.0 166 0.487382 0.805825 9
3 France Belgium 116.0 154 -0.345756 0.747573 9
0 Brazil Portugal 177.0 145 -0.596288 0.703883 9
11 Slovakia Czech Republic 69.0 145 0.035609 0.703883 9
181 Bosnia-Herzegovina Serbia 15.0 141 3.632387 0.684466 9
19 Czech Republic Slovakia 59.0 125 0.044083 0.606796 8
9 Greece Cyprus 75.0 124 -0.185225 0.601942 8
21 Cyprus Greece 58.0 115 -0.022882 0.558252 8
86 Austria Slovenia 27.0 114 1.080741 0.553398 8
150 Czech Republic Austria 17.0 111 2.217741 0.538835 8
12 Germany Turkey 64.0 111 -0.145288 0.538835 8
16 Spain England 61.0 110 -0.111331 0.533981 8
116 Slovakia Austria 21.0 109 1.557904 0.529126 8
5 Belgium Netherlands 88.0 109 -0.389591 0.529126 8
48 Croatia Slovenia 37.0 108 0.438464 0.524272 8
81 Croatia Austria 28.0 104 0.830427 0.504854 7
4 France England 111.0 102 -0.547150 0.495146 7
301 Portugal Cyprus 10.0 101 3.977353 0.490291 7
10 Netherlands Belgium 70.0 100 -0.295990 0.485437 7
24 Austria Germany 54.0 97 -0.114772 0.470874 7
115 Serbia Montenegro 21.0 97 1.276300 0.470874 7
190 Spain Greece 15.0 97 2.186820 0.470874 7
26 France Luxembourg 52.0 96 -0.090202 0.466019 7
17 Portugal Brazil 59.0 92 -0.231555 0.446602 7
74 Montenegro Serbia 30.0 82 0.347006 0.398058 7
... ... ... ... ... ... ... ...
33 Spain Portugal 45.0 50 -0.452436 0.242718 2
18 France Spain 59.0 49 -0.590719 0.237864 2
62 England Germany 33.0 49 -0.268256 0.237864 2
454 Northern Ireland Ireland 7.0 49 2.449650 0.237864 2
67 Germany Netherlands 32.0 48 -0.260789 0.233010 2
314 Bosnia-Herzegovina Austria 10.0 48 1.365474 0.233010 2
141 Luxembourg France 18.0 47 0.286774 0.228155 2
58 Turkey Germany 33.0 45 -0.327990 0.218447 2
170 France Turkey 16.0 45 0.386020 0.218447 2
131 Austria Hungary 19.0 45 0.167175 0.218447 2
83 England Italy 28.0 44 -0.225589 0.213592 1
251 Luxembourg Germany 12.0 44 0.806960 0.213592 1
22 France Switzerland 55.0 44 -0.605754 0.213592 1
215 Sweden Finland 13.0 43 0.630055 0.208738 1
365 Poland Slovakia 8.0 43 1.648839 0.208738 1
118 Finland Sweden 20.0 42 0.034895 0.203883 1
102 Denmark Sweden 23.0 42 -0.100091 0.203883 1
127 Luxembourg Belgium 20.0 42 0.034895 0.203883 1
94 Bulgaria Greece 24.0 41 -0.158121 0.199029 1
76 Italy France 29.0 41 -0.303273 0.199029 1
44 Italy Switzerland 39.0 40 -0.494557 0.194175 0
245 Netherlands Turkey 12.0 40 0.642691 0.194175 0
592 Italy Slovenia 5.0 40 2.942457 0.194175 0
61 Portugal England 33.0 39 -0.417592 0.189320 0
78 Serbia Hungary 29.0 38 -0.354253 0.184466 0
445 Romania Cyprus 7.0 38 1.675239 0.184466 0
38 England France 42.0 38 -0.554127 0.184466 0
1208 Cyprus Bulgaria 2.0 38 8.363336 0.184466 0
63 Germany Switzerland 32.0 38 -0.414791 0.184466 0
64 Romania Hungary 32.0 38 -0.414791 0.184466 0

100 rows × 7 columns


In [3]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=loans
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/loan_change.csv',index=False)
s


Out[3]:
source target before id diff value decile
0 England Scotland 34.0 127 0.096302 1.000000 8
5 Italy Spain 22.0 94 0.254038 0.740157 8
15 Brazil Portugal 13.0 76 0.715835 0.598425 8
20 England Spain 10.0 70 1.054487 0.551181 8
27 Portugal Spain 7.0 65 1.725340 0.511811 8
81 Czech Republic Slovakia 3.0 64 5.261294 0.503937 8
336 Italy Slovenia 1.0 55 15.142399 0.433071 8
24 England Netherlands 9.0 52 0.695767 0.409449 8
14 England Italy 14.0 47 -0.014685 0.370079 8
8 France Belgium 19.0 45 -0.304873 0.354331 8
13 Italy England 15.0 44 -0.139072 0.346457 8
7 Belgium Netherlands 20.0 41 -0.398329 0.322835 8
3 England France 30.0 41 -0.598886 0.322835 8
127 Italy Romania 2.0 40 4.869963 0.314961 8
48 Italy Portugal 5.0 34 0.995788 0.267717 8
214 Slovakia Czech Republic 1.0 32 8.391941 0.251969 8
6 Portugal Brazil 21.0 31 -0.566741 0.244094 8
53 Spain Italy 5.0 30 0.760989 0.236220 8
18 Ukraine Russia 12.0 28 -0.315171 0.220472 8
47 England Germany 5.0 28 0.643590 0.220472 8
337 Italy Croatia 1.0 26 6.630952 0.204724 6
192 Portugal Turkey 1.0 24 6.043956 0.188976 6
33 Italy Switzerland 7.0 24 0.006279 0.188976 6
338 Croatia Italy 1.0 23 5.750458 0.181102 6
10 Spain Portugal 18.0 23 -0.624975 0.181102 6
17 Italy France 13.0 23 -0.480734 0.181102 6
35 Italy Belgium 6.0 23 0.125076 0.181102 6
4 Scotland England 24.0 22 -0.730960 0.173228 6
12 Spain England 17.0 22 -0.620179 0.173228 6
36 England Portugal 6.0 22 0.076160 0.173228 6
... ... ... ... ... ... ... ...
134 Germany Spain 2.0 11 0.614240 0.086614 0
150 Belgium England 2.0 11 0.614240 0.086614 0
76 Croatia Slovenia 3.0 11 0.076160 0.086614 0
145 Croatia Bosnia-Herzegovina 2.0 11 0.614240 0.086614 0
74 Portugal Greece 4.0 11 -0.192880 0.086614 0
67 Portugal England 4.0 11 -0.192880 0.086614 0
185 Uruguay Brazil 1.0 11 2.228480 0.086614 0
80 Germany Turkey 3.0 11 0.076160 0.086614 0
55 Argentina Italy 5.0 11 -0.354304 0.086614 0
346 Russia Italy 1.0 11 2.228480 0.086614 0
16 Spain France 13.0 11 -0.751655 0.086614 0
345 Italy Netherlands 1.0 11 2.228480 0.086614 0
168 Ukraine Belarus 2.0 11 0.614240 0.086614 0
279 Argentina Chile 1.0 11 2.228480 0.086614 0
50 Spain Brazil 5.0 11 -0.354304 0.086614 0
294 Belgium Israel 1.0 10 1.934982 0.078740 0
116 France Germany 2.0 10 0.467491 0.078740 0
347 Russia Poland 1.0 10 1.934982 0.078740 0
63 Russia Brazil 4.0 10 -0.266255 0.078740 0
348 England Greece 1.0 10 1.934982 0.078740 0
349 Ukraine Brazil 1.0 10 1.934982 0.078740 0
29 Argentina Spain 7.0 10 -0.580717 0.078740 0
351 Egypt Belgium 1.0 9 1.641484 0.070866 0
354 Russia Greece 1.0 9 1.641484 0.070866 0
114 France Greece 2.0 9 0.320742 0.070866 0
353 Belgium Spain 1.0 9 1.641484 0.070866 0
352 Germany Bosnia-Herzegovina 1.0 9 1.641484 0.070866 0
23 Spain Germany 10.0 9 -0.735852 0.070866 0
350 Iceland Norway 1.0 9 1.641484 0.070866 0
355 Romania Portugal 1.0 9 1.641484 0.070866 0

100 rows × 7 columns


In [2]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[nonloans.feeValue>0]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/paid_transfer_change.csv',index=False)
s


Out[2]:
source target before id diff value decile
0 France England 52.0 65 -0.392261 1.000000 8
2 Spain England 31.0 62 -0.027617 0.953846 8
3 Netherlands England 26.0 43 -0.195914 0.661538 8
5 Italy England 21.0 42 -0.027617 0.646154 8
31 Germany Turkey 10.0 42 1.042004 0.646154 8
1 Scotland England 37.0 42 -0.448107 0.646154 8
26 Germany England 11.0 38 0.679570 0.584615 8
50 Spain Italy 7.0 34 1.361501 0.523077 8
32 Portugal Spain 9.0 34 0.836723 0.523077 8
35 Austria Germany 9.0 32 0.728681 0.492308 8
9 Switzerland Germany 18.0 28 -0.243702 0.430769 8
14 France Spain 16.0 25 -0.240326 0.384615 8
33 Brazil Italy 9.0 25 0.350532 0.384615 8
22 Portugal England 12.0 23 -0.068133 0.353846 8
18 England Spain 14.0 23 -0.201257 0.353846 8
60 France Italy 6.0 22 0.782702 0.338462 8
49 Argentina Italy 7.0 22 0.528030 0.338462 8
86 Italy Germany 4.0 21 1.552505 0.323077 8
43 England Germany 8.0 21 0.276252 0.323077 8
387 Croatia Italy 1.0 20 8.723828 0.307692 6
108 England Turkey 4.0 20 1.430957 0.307692 6
10 Netherlands Germany 18.0 19 -0.486798 0.292308 6
72 Italy France 5.0 18 0.750289 0.276923 6
17 Italy Spain 14.0 18 -0.374897 0.276923 6
37 England Italy 8.0 18 0.093931 0.276923 6
13 Brazil Portugal 16.0 17 -0.483422 0.261538 6
120 France Belgium 3.0 17 1.755085 0.261538 6
27 Belgium England 11.0 17 -0.248613 0.261538 6
46 England France 7.0 16 0.111295 0.246154 6
359 Portugal Italy 1.0 16 6.779062 0.246154 6
... ... ... ... ... ... ... ...
397 Spain Russia 1.0 9 3.375723 0.138462 0
29 Brazil Russia 10.0 9 -0.562428 0.138462 0
396 Poland Italy 1.0 9 3.375723 0.138462 0
15 Belgium France 15.0 9 -0.708285 0.138462 0
267 Russia Turkey 1.0 9 3.375723 0.138462 0
144 Argentina Portugal 3.0 9 0.458574 0.138462 0
103 Switzerland England 4.0 9 0.093931 0.138462 0
77 Italy Argentina 5.0 9 -0.124855 0.138462 0
96 Belarus Russia 4.0 9 0.093931 0.138462 0
110 France Russia 4.0 9 0.093931 0.138462 0
131 England Russia 3.0 9 0.458574 0.138462 0
395 Netherlands Portugal 1.0 9 3.375723 0.138462 0
95 Croatia Belgium 4.0 8 -0.027617 0.123077 0
246 Serbia Italy 1.0 8 2.889531 0.123077 0
138 Netherlands France 3.0 8 0.296510 0.123077 0
400 Ukraine Brazil 1.0 8 2.889531 0.123077 0
399 Norway Netherlands 1.0 8 2.889531 0.123077 0
250 Slovakia Poland 1.0 8 2.889531 0.123077 0
135 Germany Netherlands 3.0 8 0.296510 0.123077 0
281 Turkey China 1.0 8 2.889531 0.123077 0
92 Poland England 4.0 8 -0.027617 0.123077 0
309 Sweden Turkey 1.0 8 2.889531 0.123077 0
19 Ukraine Russia 14.0 8 -0.722176 0.123077 0
38 Brazil England 8.0 8 -0.513809 0.123077 0
123 Italy Turkey 3.0 8 0.296510 0.123077 0
230 Belgium Italy 2.0 8 0.944766 0.123077 0
52 Germany Austria 7.0 8 -0.444353 0.123077 0
55 Germany Denmark 6.0 8 -0.351745 0.123077 0
398 Norway China 1.0 8 2.889531 0.123077 0
156 Bosnia-Herzegovina Croatia 3.0 7 0.134447 0.107692 0

100 rows × 7 columns


In [ ]:
big5leagues=['GB1','FR1','L1','IT1','ES1']
#big5leagues=['ZYP1']
big5ids=nonloans[(nonloans.fromLeague.isin(big5leagues))|(nonloans.toLeague.isin(big5leagues))].id
t=nonloans[(nonloans.feeValue>0)&(nonloans.id.isin(big5ids))]
t=nonloans[~nonloans.Nationality.isin(uefa)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)
s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
#s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/non_european_transfer_change.csv',index=False)
s

In [ ]:
t=nonloans[~nonloans.Nationality.isin(uefa)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(0)

s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 5,labels=False)*2
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/non_euro_transfer_change.csv',index=False)
s

In [5]:
t=nonloans[nonloans.Nationality.isin(tier3)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(0)

s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/tier3_transfer_change.csv',index=False)
s


Out[5]:
source target before id diff value decile
0 Slovenia Austria 79.0 202 1.556962 1.000000 9
1 Croatia Bosnia-Herzegovina 38.0 191 4.026316 0.945545 9
2 Serbia Bosnia-Herzegovina 24.0 183 6.625000 0.905941 9
3 Bosnia-Herzegovina Croatia 53.0 162 2.056604 0.801980 9
4 Bosnia-Herzegovina Serbia 13.0 137 9.538462 0.678218 9
5 Slovakia Czech Republic 69.0 132 0.913043 0.653465 9
6 Czech Republic Slovakia 59.0 120 1.033898 0.594059 9
7 Austria Slovenia 26.0 110 3.230769 0.544554 9
8 Slovakia Austria 17.0 109 5.411765 0.539604 9
9 Czech Republic Austria 17.0 106 5.235294 0.524752 8
10 Croatia Slovenia 36.0 106 1.944444 0.524752 8
11 Croatia Austria 25.0 101 3.040000 0.500000 8
12 Hungary Austria 26.0 71 1.730769 0.351485 8
13 Slovenia Croatia 28.0 62 1.214286 0.306931 8
14 Czech Republic Germany 15.0 62 3.133333 0.306931 8
15 Slovakia Poland 9.0 55 5.111111 0.272277 8
16 Austria Croatia 12.0 54 3.500000 0.267327 8
17 Croatia Germany 19.0 53 1.789474 0.262376 8
18 Serbia Montenegro 15.0 53 2.533333 0.262376 8
19 England Ireland 43.0 51 0.186047 0.252475 7
20 Poland Germany 29.0 51 0.758621 0.252475 7
21 Serbia Greece 17.0 48 1.823529 0.237624 7
22 Bosnia-Herzegovina Austria 8.0 47 4.875000 0.232673 7
23 Montenegro Serbia 16.0 44 1.750000 0.217822 7
24 Poland Slovakia 7.0 41 4.857143 0.202970 7
25 Sweden Norway 34.0 40 0.176471 0.198020 7
26 Austria Hungary 16.0 40 1.500000 0.198020 7
27 Ireland England 20.0 39 0.950000 0.193069 7
28 Croatia Italy 3.0 35 10.666667 0.173267 7
29 Austria Slovakia 14.0 35 1.500000 0.173267 7
... ... ... ... ... ... ... ...
70 Macedonia Serbia 2.0 22 10.000000 0.108911 2
71 Croatia Hungary 10.0 21 1.100000 0.103960 2
72 England Scotland 19.0 21 0.105263 0.103960 2
73 Germany Luxembourg 4.0 21 4.250000 0.103960 2
74 Bulgaria Cyprus 2.0 20 9.000000 0.099010 1
75 Russia Belarus 8.0 20 1.500000 0.099010 1
76 Serbia Austria 17.0 20 0.176471 0.099010 1
77 Slovakia Serbia 2.0 20 9.000000 0.099010 1
78 Serbia Italy 7.0 20 1.857143 0.099010 1
79 Serbia Slovenia 14.0 20 0.428571 0.099010 1
80 Northern Ireland Ireland 4.0 20 4.000000 0.099010 1
81 Romania Bulgaria 2.0 20 9.000000 0.099010 1
82 Finland Germany 6.0 20 2.333333 0.099010 1
83 Albania Serbia 0.0 20 inf 0.099010 1
84 Cyprus Serbia 2.0 19 8.500000 0.094059 1
85 Romania Moldova 20.0 19 -0.050000 0.094059 1
86 Poland Bulgaria 1.0 19 18.000000 0.094059 1
87 Romania Italy 8.0 19 1.375000 0.094059 1
88 Belgium Israel 2.0 19 8.500000 0.094059 1
89 Lithuania Norway 0.0 18 inf 0.089109 0
90 Cyprus Romania 7.0 18 1.571429 0.089109 0
91 Croatia Belgium 15.0 18 0.200000 0.089109 0
92 Sweden Netherlands 18.0 18 0.000000 0.089109 0
93 Italy Romania 9.0 18 1.000000 0.089109 0
94 Poland Czech Republic 5.0 17 2.400000 0.084158 0
95 Greece Bulgaria 8.0 17 1.125000 0.084158 0
96 Israel Belgium 0.0 17 inf 0.084158 0
97 Austria Germany 8.0 17 1.125000 0.084158 0
98 Moldova Romania 10.0 17 0.700000 0.084158 0
99 Greece Cyprus 19.0 17 -0.105263 0.084158 0

100 rows × 7 columns


In [5]:
t=nonloans[(nonloans.Nationality=='Brazil')&(nonloans.feeValue>0)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!='Brazil')&(t.toCountry!='Brazil')&(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!='Brazil')&(t.toCountry!='Brazil')&(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)

s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 1,labels=False)*5
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/brazil_transfer_change.csv',index=False)
s


Out[5]:
source target before id diff value decile
0 Portugal Russia 3.0 4 0.333333 1.00 0
1 Portugal Italy 1.0 4 3.000000 1.00 0
2 Spain Italy 1.0 4 3.000000 1.00 0
3 Bulgaria Turkey 1.0 3 2.000000 0.75 0
4 Ukraine Italy 1.0 3 2.000000 0.75 0
5 Romania Saudi Arabia 1.0 3 2.000000 0.75 0
6 Portugal Saudi Arabia 1.0 3 2.000000 0.75 0
7 Portugal Spain 2.0 3 0.500000 0.75 0
8 Portugal Romania 1.0 2 1.000000 0.50 0
9 Portugal Poland 2.0 2 0.000000 0.50 0
10 Sweden Russia 1.0 2 1.000000 0.50 0
11 Spain England 3.0 2 -0.333333 0.50 0
12 Portugal China 1.0 2 1.000000 0.50 0
13 Ukraine China 1.0 2 1.000000 0.50 0
14 Ukraine Germany 1.0 2 1.000000 0.50 0
15 England China 1.0 2 1.000000 0.50 0
16 France Saudi Arabia 1.0 2 1.000000 0.50 0
17 Italy France 1.0 2 1.000000 0.50 0
18 England France 1.0 2 1.000000 0.50 0
19 Italy Portugal 1.0 2 1.000000 0.50 0
20 Italy England 1.0 2 1.000000 0.50 0
21 Turkey Italy 1.0 1 0.000000 0.25 0
22 England Spain 1.0 1 0.000000 0.25 0
23 France Portugal 1.0 1 0.000000 0.25 0
24 Italy Spain 1.0 1 0.000000 0.25 0
25 Slovakia Belgium 1.0 1 0.000000 0.25 0
26 Portugal Ukraine 1.0 1 0.000000 0.25 0
27 Romania Italy 1.0 1 0.000000 0.25 0
28 Slovakia Greece 1.0 1 0.000000 0.25 0
29 Slovenia Portugal 1.0 1 0.000000 0.25 0
... ... ... ... ... ... ... ...
70 Croatia Azerbaijan 1.0 1 0.000000 0.25 0
71 Germany Cyprus 1.0 1 0.000000 0.25 0
72 Bulgaria Ukraine 1.0 1 0.000000 0.25 0
73 Bulgaria Romania 1.0 1 0.000000 0.25 0
74 Bulgaria Macedonia 1.0 1 0.000000 0.25 0
75 Bulgaria Germany 1.0 1 0.000000 0.25 0
76 Bulgaria Azerbaijan 1.0 1 0.000000 0.25 0
77 Denmark China 1.0 1 0.000000 0.25 0
78 Netherlands Austria 1.0 1 0.000000 0.25 0
79 France United Arab Emirates 1.0 1 0.000000 0.25 0
80 Portugal Iran 1.0 1 0.000000 0.25 0
81 Portugal England 1.0 1 0.000000 0.25 0
82 Italy China 1.0 1 0.000000 0.25 0
83 Poland Moldova 1.0 1 0.000000 0.25 0
84 Netherlands Germany 1.0 1 0.000000 0.25 0
85 Netherlands England 1.0 1 0.000000 0.25 0
86 Germany England 1.0 1 0.000000 0.25 0
87 Sweden China 1.0 1 0.000000 0.25 0
88 Hungary France 1.0 1 0.000000 0.25 0
89 Hungary Spain 1.0 1 0.000000 0.25 0
90 Italy Qatar 1.0 1 0.000000 0.25 0
91 Portugal Germany 1.0 1 0.000000 0.25 0
92 Germany United Arab Emirates 1.0 1 0.000000 0.25 0
93 Italy Turkey 1.0 1 0.000000 0.25 0
94 Korea, South China 1.0 1 0.000000 0.25 0
95 Hungary Cyprus 1.0 1 0.000000 0.25 0
96 Lithuania China 1.0 1 0.000000 0.25 0
97 Austria Poland 1.0 1 0.000000 0.25 0
98 Belgium Russia 1.0 1 0.000000 0.25 0
99 Russia Mexico 1.0 1 0.000000 0.25 0

100 rows × 7 columns


In [4]:
t=nonloans[(nonloans.Nationality=='Brazil')&(nonloans.feeValue>0)]
#t=t[t.feeValue>0]
s1=t[(t.fromCountry!=t.toCountry)&(t.season.between(2001,2005))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
s2=t[(t.fromCountry!=t.toCountry)&(t.season.between(2011,2015))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()

s=s1.rename(columns={'id':'before'}).merge(s2,how='right').sort_values('id',ascending=False).fillna(1)

s=s[:100]
s['diff']=((s['id']/s['id'].sum())-(s['before']/s['before'].sum()))/((s['before']/s['before'].sum()))
s['diff']=((s['id'])-(s['before']))/((s['before']))
s=s.sort_values('id',ascending=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 1,labels=False)*5
s=s.reset_index(drop=True)
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/paid_brazil_transfer_change.csv',index=False)
s


Out[4]:
source target before id diff value decile
0 Brazil Italy 9.0 24 1.666667 1.000000 0
1 Brazil Portugal 15.0 17 0.133333 0.708333 0
2 Brazil Ukraine 10.0 12 0.200000 0.500000 0
3 Brazil Spain 10.0 10 0.000000 0.416667 0
4 Brazil France 18.0 9 -0.500000 0.375000 0
5 Brazil Russia 10.0 9 -0.100000 0.375000 0
6 Brazil England 8.0 7 -0.125000 0.291667 0
7 Brazil China 1.0 7 6.000000 0.291667 0
8 Brazil Germany 19.0 6 -0.684211 0.250000 0
9 Brazil United Arab Emirates 2.0 6 2.000000 0.250000 0
10 Brazil Turkey 7.0 6 -0.142857 0.250000 0
11 Italy Brazil 1.0 6 5.000000 0.250000 0
12 Ukraine Brazil 1.0 6 5.000000 0.250000 0
13 Portugal Brazil 5.0 5 0.000000 0.208333 0
14 Portugal Italy 1.0 4 3.000000 0.166667 0
15 Brazil Austria 1.0 4 3.000000 0.166667 0
16 Spain Italy 1.0 4 3.000000 0.166667 0
17 Portugal Russia 3.0 4 0.333333 0.166667 0
18 Brazil Bulgaria 5.0 4 -0.200000 0.166667 0
19 Ukraine Italy 1.0 3 2.000000 0.125000 0
20 Portugal Spain 2.0 3 0.500000 0.125000 0
21 Portugal Saudi Arabia 1.0 3 2.000000 0.125000 0
22 Germany Brazil 2.0 3 0.500000 0.125000 0
23 Romania Saudi Arabia 1.0 3 2.000000 0.125000 0
24 Bulgaria Turkey 1.0 3 2.000000 0.125000 0
25 Brazil Serbia 1.0 2 1.000000 0.083333 0
26 England China 1.0 2 1.000000 0.083333 0
27 Brazil Switzerland 3.0 2 -0.333333 0.083333 0
28 Portugal Poland 2.0 2 0.000000 0.083333 0
29 France Brazil 2.0 2 0.000000 0.083333 0
... ... ... ... ... ... ... ...
70 Portugal England 1.0 1 0.000000 0.041667 0
71 Italy United Arab Emirates 1.0 1 0.000000 0.041667 0
72 Portugal Iran 1.0 1 0.000000 0.041667 0
73 Russia China 1.0 1 0.000000 0.041667 0
74 Portugal Germany 1.0 1 0.000000 0.041667 0
75 Denmark Cyprus 1.0 1 0.000000 0.041667 0
76 Portugal France 1.0 1 0.000000 0.041667 0
77 Austria Poland 1.0 1 0.000000 0.041667 0
78 Belgium Russia 1.0 1 0.000000 0.041667 0
79 Brazil Saudi Arabia 1.0 1 0.000000 0.041667 0
80 Bulgaria Azerbaijan 1.0 1 0.000000 0.041667 0
81 Bulgaria Germany 1.0 1 0.000000 0.041667 0
82 Bulgaria Macedonia 1.0 1 0.000000 0.041667 0
83 Belgium Qatar 1.0 1 0.000000 0.041667 0
84 Bulgaria Romania 1.0 1 0.000000 0.041667 0
85 Bulgaria Ukraine 1.0 1 0.000000 0.041667 0
86 China Brazil 1.0 1 0.000000 0.041667 0
87 Bulgaria Brazil 1.0 1 0.000000 0.041667 0
88 Korea, South China 1.0 1 0.000000 0.041667 0
89 Germany Cyprus 1.0 1 0.000000 0.041667 0
90 Croatia Azerbaijan 1.0 1 0.000000 0.041667 0
91 Croatia Brazil 1.0 1 0.000000 0.041667 0
92 Cyprus Russia 1.0 1 0.000000 0.041667 0
93 Denmark Belgium 1.0 1 0.000000 0.041667 0
94 Denmark China 1.0 1 0.000000 0.041667 0
95 England Portugal 1.0 1 0.000000 0.041667 0
96 Finland Belgium 1.0 1 0.000000 0.041667 0
97 France Qatar 1.0 1 0.000000 0.041667 0
98 France United Arab Emirates 1.0 1 0.000000 0.041667 0
99 Slovakia Greece 1.0 1 0.000000 0.041667 0

100 rows × 7 columns


In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 
import math
import matplotlib.pyplot as plt

transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')

leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
                                                         'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
                                                         'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}

teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
    transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
    .sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()

agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()

leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1']

countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')


top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index

uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index
tier3=countryInfo[countryInfo.marketTier==3].index

leagues=['GB1']
eplIds=nonloans[(nonloans.fromLeague.isin(leagues))|(nonloans.toLeague.isin(leagues))].id
nonEnglandIds=nonloans[(nonloans.fromCountry!='England')&(nonloans.toCountry!='England')].id
t=nonloans

s=t[(t.id.isin(eplIds))&(t.fromCountry!=t.toCountry)&(t.toCountry!='England')&(t.fromCountry!='England')].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:80]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 6,labels=False)*2
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/epl_transfers.csv',index=False)
s


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (0,38,40,41,45,47,49,55) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (45,47) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (40,45,47,51) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (32,34) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[1]:
source target id value decile
0 France Italy 32 1.00000 10
1 France Spain 25 0.78125 10
2 Italy Spain 22 0.68750 10
3 Spain France 22 0.68750 10
4 Spain Italy 20 0.62500 10
5 Netherlands Italy 20 0.62500 10
6 Belgium Netherlands 19 0.59375 10
7 Argentina Spain 18 0.56250 10
8 Belgium France 16 0.50000 10
9 Netherlands Spain 15 0.46875 10
10 Argentina Italy 14 0.43750 10
11 Brazil Portugal 13 0.40625 10
12 Germany Spain 12 0.37500 8
13 Switzerland Germany 12 0.37500 8
14 France Portugal 12 0.37500 8
15 Denmark Germany 12 0.37500 8
16 Netherlands Germany 12 0.37500 8
17 Ireland Northern Ireland 11 0.34375 8
18 Portugal Spain 11 0.34375 8
19 Italy France 11 0.34375 8
20 France Greece 11 0.34375 8
21 France Belgium 10 0.31250 8
22 Germany Denmark 10 0.31250 8
23 France Germany 10 0.31250 8
24 Greece Cyprus 10 0.31250 8
25 Austria Germany 9 0.28125 6
26 Scotland United States 9 0.28125 6
27 Scotland Ireland 9 0.28125 6
28 Italy Greece 9 0.28125 6
29 Spain Netherlands 9 0.28125 6
... ... ... ... ... ...
50 Belgium Germany 7 0.21875 2
51 France Qatar 7 0.21875 2
52 Italy Germany 7 0.21875 2
53 Sweden Netherlands 7 0.21875 2
54 Denmark Sweden 6 0.18750 0
55 Northern Ireland Ireland 6 0.18750 0
56 Senegal France 6 0.18750 0
57 Cote d'Ivoire Belgium 6 0.18750 0
58 France Scotland 6 0.18750 0
59 Norway Sweden 6 0.18750 0
60 Greece Italy 6 0.18750 0
61 Sweden Norway 6 0.18750 0
62 Cameroon France 6 0.18750 0
63 Greece Spain 6 0.18750 0
64 Argentina Mexico 6 0.18750 0
65 Uruguay Italy 6 0.18750 0
66 Czech Republic Germany 6 0.18750 0
67 Netherlands Turkey 6 0.18750 0
68 Sweden Italy 6 0.18750 0
69 Netherlands Belgium 6 0.18750 0
70 Spain Argentina 6 0.18750 0
71 United States Germany 6 0.18750 0
72 Spain Turkey 6 0.18750 0
73 Italy Argentina 6 0.18750 0
74 Turkey Netherlands 5 0.15625 0
75 Germany Norway 5 0.15625 0
76 Germany Greece 5 0.15625 0
77 Belgium Switzerland 5 0.15625 0
78 Brazil France 5 0.15625 0
79 Belgium United States 5 0.15625 0

80 rows × 5 columns


In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 
import math
import matplotlib.pyplot as plt

transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')

leagues=['L1','GB1','FR1','ES1','IT1']
countries=['Germany','England','France','Spain','Italy']
eplIds=nonloans[(nonloans.fromLeague.isin(leagues))|(nonloans.toLeague.isin(leagues))].id
t=nonloans

s=t[(t.id.isin(eplIds))&(t.fromCountry!=t.toCountry)&(~t.fromCountry.isin(countries))&(~t.toCountry.isin(countries))].groupby(['fromCountry','toCountry']).count()[['id']].sort_values('id',ascending=False).reset_index()
#s.groupby('toCountry').apply(lambda x:x[:5]).reset_index(drop=True).sort_values('id',ascending=False).reset_index(drop=True)
#s['cumpct']=s['id'].cumsum()/s['id'].sum()
s=s[:100]
#s['value']=pd.qcut(s.id, 10, labels=False)
s['value']=s.id/s.id.max()
s['decile']=pd.qcut(s.id, 10,labels=False)
#s.id=10+s.id.max()-s.id
s=s.rename(columns={'fromCountry':'source','toCountry':'target'})
s.to_csv('../html/tier1_transfers.csv',index=False)
s


Out[2]:
source target id value decile
0 Brazil Portugal 72 1.000000 9
1 Argentina Mexico 38 0.527778 9
2 Belgium Netherlands 37 0.513889 9
3 Greece Cyprus 36 0.500000 9
4 Cyprus Greece 32 0.444444 9
5 Portugal Brazil 29 0.402778 9
6 Mexico Argentina 28 0.388889 9
7 Uruguay Argentina 27 0.375000 9
8 Netherlands Belgium 24 0.333333 9
9 Brazil Japan 24 0.333333 9
10 Argentina Uruguay 23 0.319444 8
11 Japan Brazil 22 0.305556 8
12 Portugal Greece 22 0.305556 8
13 Denmark Netherlands 21 0.291667 8
14 Argentina Portugal 20 0.277778 8
15 Brazil Switzerland 20 0.277778 8
16 Denmark Norway 19 0.263889 8
17 Norway Sweden 18 0.250000 8
18 Argentina Brazil 17 0.236111 8
19 Belgium Turkey 17 0.236111 8
20 Sweden Netherlands 16 0.222222 7
21 Portugal Turkey 16 0.222222 7
22 Portugal Cyprus 16 0.222222 7
23 Ukraine Russia 16 0.222222 7
24 Greece Poland 16 0.222222 7
25 Argentina Greece 15 0.208333 6
26 United Arab Emirates Brazil 15 0.208333 6
27 Argentina Chile 15 0.208333 6
28 Portugal Russia 15 0.208333 6
29 Sweden Norway 15 0.208333 6
... ... ... ... ... ...
70 Montenegro Serbia 11 0.152778 2
71 Netherlands Turkey 11 0.152778 2
72 Norway Belgium 11 0.152778 2
73 Argentina Switzerland 11 0.152778 2
74 Austria Greece 10 0.138889 1
75 Croatia Japan 10 0.138889 1
76 Czech Republic Austria 10 0.138889 1
77 Scotland United States 10 0.138889 1
78 Chile Argentina 10 0.138889 1
79 Greece Romania 10 0.138889 1
80 Portugal Romania 10 0.138889 1
81 Portugal Argentina 10 0.138889 1
82 Bosnia-Herzegovina Croatia 10 0.138889 1
83 Bosnia-Herzegovina Serbia 10 0.138889 1
84 Mexico Uruguay 10 0.138889 1
85 Serbia Belgium 10 0.138889 1
86 Brazil Russia 10 0.138889 1
87 Korea, South Japan 10 0.138889 1
88 Australia Netherlands 10 0.138889 1
89 Slovenia Austria 10 0.138889 1
90 Greece China 9 0.125000 0
91 Belgium Luxembourg 9 0.125000 0
92 Sweden Scotland 9 0.125000 0
93 Greece Netherlands 9 0.125000 0
94 Netherlands Cyprus 9 0.125000 0
95 Serbia Greece 9 0.125000 0
96 Portugal Belgium 9 0.125000 0
97 Greece United States 9 0.125000 0
98 Turkey Russia 9 0.125000 0
99 Scotland Ireland 9 0.125000 0

100 rows × 5 columns


In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 
import math
import matplotlib.pyplot as plt

transfers=pd.read_csv('../data/merged/transfers_q.csv',index_col=False, warn_bad_lines =True,error_bad_lines=False)
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')

leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
                                                         'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
                                                         'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}

teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
    transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
    .sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()

agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()

leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1']

countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')


top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index

uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index
tier3=countryInfo[countryInfo.marketTier==3].index


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (0,38,40,41,45,47,49,55) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (45,47) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (40,45,47,51) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (32,34) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)