In [1]:
origin=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['fromCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['toCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()


fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Set1',title="cumulative # of international loans by origin country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Set1',title="cumulative # of international loans by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_loans_by_country.png')

#ax1.set(sub1)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-9c57dbc25d52> in <module>()
----> 1 origin=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['fromCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))]     .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack()     .cumsum()
      2 destination=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['toCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))]     .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack()     .cumsum()
      3 
      4 
      5 fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)

NameError: name 'loans' is not defined

In [31]:
origin=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['season']>0)&(nonloans['Age'].between(27,50))&(nonloans['fromCountry'].isin(['England','Brazil','Argentina','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['season']>0)&(nonloans['Age'].between(27,50))&(nonloans['toCountry'].isin(['England','Brazil','Argentina','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()


fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Set1',title="cumulative # of international transfers by origin country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Set1',title="cumulative # of international transfers by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_transfers_by_country.png')


#ax1.set(sub1)



In [332]:
origin=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['season']>2010)&(nonloans['Age'].between(20,25))&(nonloans['fromCountry'].isin(['England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia','Turkey']))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['season']>0)&(nonloans['Age'].between(20,25))&(nonloans['toCountry'].isin(['England','Portugal','Russia','Turkey']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    #.cumsum()


fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Set1',title="cumulative # of international transfers by origin country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Set1',title="cumulative # of international transfers by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_transfers_by_country.png')


#ax1.set(sub1)



In [308]:
t=nonloans[(nonloans['toLeague']=='NL1')&(nonloans['Nationality']=='Netherlands')]
teamIdMap=t.drop_duplicates('fromTeamId')[['fromTeamId','fromTeamName']].set_index('fromTeamId').to_dict()['fromTeamName']

t.groupby('Nationality 2')[['feeValue']].count().sort_values('feeValue',ascending=False).rename(index=teamIdMap)['feeValue'].plot.pie(figsize=(10,10))


Out[308]:
<matplotlib.axes._subplots.AxesSubplot at 0x143664750>

In [28]:
t=players[players['Nationality']=='Brazil'].copy()
t['Nationality 2']=np.where(t['Nationality 2'].isnull(),t['Nationality'],t['Nationality 2'])

t.groupby('Nationality 2')[['id']].count().sort_values('id',ascending=False).rename(index=teamIdMap)['id'].plot.pie(figsize=(10,10))


Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x11e698d50>

In [16]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True)

#()
t1=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry'].notnull()!=nonloans['toCountry'])].groupby('season')[['id']].count().rename(columns={'id': '# of intl transfers','feeValue':"declared fees (euro)"})
#t2=nonloans[nonloans['fromCountry']!=nonloans['toCountry']].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of transfers','feeValue':"declared fees (euro)"})
t1[['# of intl transfers']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t1


Out[16]:
# of intl transfers
season
1929 1
1962 2
1964 1
1969 1
1972 1
1974 1
1976 1
1977 1
1979 4
1980 7
1981 11
1982 23
1983 35
1984 53
1985 79
1986 122
1987 178
1988 270
1989 368
1990 486
1991 677
1992 858
1993 1075
1994 1336
1995 1772
1996 2251
1997 2827
1998 3232
1999 3882
2000 4374
2001 5051
2002 5972
2003 7197
2004 8739
2005 9915
2006 10899
2007 12115
2008 12410
2009 12788
2010 13719
2011 14229
2012 14267
2013 14075
2014 13681
2015 12818

In [17]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True)

#()
t1=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry'].notnull()!=nonloans['toCountry'])].groupby('season')[['feeValue']].sum().rename(columns={'id': '# of intl transfers','feeValue':"total disclosed intl transfer fees (euro)"})
#t2=nonloans[nonloans['fromCountry']!=nonloans['toCountry']].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of transfers','feeValue':"declared fees (euro)"})
t1[['total disclosed intl transfer fees (euro)']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x1211ee190>

In [18]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True)

#()
t1=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry'].notnull()!=nonloans['toCountry'])&(nonloans['feeValue']>0)].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of paid intl transfers','feeValue':"declared fees (euro)"})
#t2=nonloans[nonloans['fromCountry']!=nonloans['toCountry']].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of transfers','feeValue':"declared fees (euro)"})
t2=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['feeValue']==0)].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of free intl transfers','feeValue':"declared fees (euro)"})
t3=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['feeValue'].isnull())].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of undisclosed intl transfers','feeValue':"declared fees (euro)"})

t1[['# of paid intl transfers']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t2[['# of free intl transfers']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t3[['# of undisclosed intl transfers']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x107123a90>

In [19]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True)

#()
t1=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry'].notnull()!=nonloans['toCountry'])&(nonloans['feeValue']>0)].groupby('season')[['id','feeValue','mv']].agg({'id':'count','feeValue':'sum','mv':'sum'}).rename(columns={'id': '# of paid intl transfers','feeValue':"transfer fees",'mv':'paid market value'})
#t2=nonloans[nonloans['fromCountry']!=nonloans['toCountry']].groupby('season')[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of transfers','feeValue':"declared fees (euro)"})
t2=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['feeValue']==0)].groupby('season')[['id','mv']].agg({'id':'count','mv':'sum'}).rename(columns={'id': '# of free intl transfers','mv':"free market value"})
t3=nonloans[(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['feeValue'].isnull())].groupby('season')[['id','mv']].agg({'id':'count','mv':'sum'}).rename(columns={'id': '# of undisclosed intl transfers','mv':"undisclosed market value"})

t1[['transfer fees']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t1[['paid market value']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t2[['free market value']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))
t3[['undisclosed market value']].rolling(window=1,center=False).mean().plot(ax=axes,xlim=(1995,2015))


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x11debbd50>

In [53]:
r= nonloans[(~(nonloans['fromLeague'].fillna('Other').str.startswith('Other')))&(~(nonloans['toLeague'].fillna('Other').str.startswith('Other')))&(nonloans['fromCountry'].notnull())&(nonloans['toCountry'].notnull())&(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['feeValue'].isnull())].sort_values('mv',ascending=False) \
    [['id','Display name','fromCountry','fromTeamName','toCountry','toTeamName','mv','fee']] \

r


Out[53]:
id Display name fromCountry fromTeamName toCountry toTeamName mv fee
8429 7691 Wálter Pandiani Spain Dep. La Coruña England Birmingham 10000000.0 ?
31261 5880 Morgan De Sanctis Italy Udinese Spain Sevilla FC 8000000.0 ?
140565 110326 Jucilei United Arab Emirates Al-Jazira China SD Luneng 7000000.0 ?
27954 9415 Fernando Baiano Brazil São Caetano Spain Málaga CF 6000000.0 ?
3037 3875 Jermain Defoe United States Toronto England Sunderland 6000000.0 ?
3297 4038 Steve Finnan England Liverpool Spain Espanyol 5500000.0 ?
57488 5854 Sébastien Frey Italy Genoa Turkey Bursaspor 5000000.0 ?
29207 15094 Robert Spain Real Betis Saudi Arabia Ittihad 5000000.0 ?
15998 21821 Luis Jiménez Italy Ternana United Arab Emirates Al-Ahli (UAE) 5000000.0 ?
33132 22750 Nilmar Qatar El-Jaish Brazil Internacional 5000000.0 ?
161567 23882 Gastón Fernández Argentina San Lorenzo Mexico Tigres 5000000.0 ?
127604 45720 Wagner Brazil Fluminense China TJ Teda 4750000.0 ?
81157 13936 Zied Jaziri Turkey Gaziantepspor France Troyes 4500000.0 ?
2808 3776 Nicolás Medina Spain Real Murcia Argentina CA Rosario 4500000.0 ?
129184 27861 Senijad Ibricic Russia Loko Moscow Turkey Gaziantepspor 4500000.0 ?
90377 13972 Tonel Portugal Sporting CP Croatia Dinamo Zagreb 4500000.0 ?
60353 3777 Sebastián Saja Mexico América Spain Córdoba CF 4500000.0 ?
86560 30758 Darío Cvitanich France OGC Nice Mexico CF Pachuca 4000000.0 ?
94864 15109 Moisés Moura Portugal Braga Qatar Al-Rayyan 4000000.0 ?
12008 26456 Mauro Zárate Argentina CA Vélez England West Ham 4000000.0 ?
93286 45673 Edgar Silva Portugal Vit. Guimarães United Arab Emirates Al-Shabab 3800000.0 ?
138541 67917 Rafael Carioca Russia Spartak Moscow Brazil Atlético-MG 3750000.0 ?
16202 39378 Jozy Altidore England Sunderland United States Toronto 3500000.0 ?
14045 38414 Ryan Donk Belgium Club Brugge Turkey Kasimpasa 3500000.0 ?
11624 3904 Ousmane Dabo England Manchester City Italy Lazio 3500000.0 ?
61353 18539 Ernesto Farías Italy US Palermo Argentina River Plate 3500000.0 ?
11009 65 Dimitar Berbatov England Fulham FC France Monaco 3500000.0 ?
29206 15094 Robert Saudi Arabia Ittihad Mexico Monterrey 3500000.0 ?
109787 13103 Björn Vleminckx Belgium Club Brugge Turkey K. Erciyesspor 3500000.0 ?
96169 40858 Nuno André Coelho Portugal Braga Turkey Balikesirspor 3500000.0 ?
... ... ... ... ... ... ... ... ...
159827 74488 Juan Diego González Colombia Once Caldas Mexico Santos NaN ?
159831 74488 Juan Diego González Argentina San Lorenzo Colombia Envigado NaN ?
159941 82718 Oswaldo Minda Ecuador Deportivo Quito United States Chivas USA NaN ?
160024 112240 Camargo Brazil Paraná United States Houston NaN ?
160091 131096 Fabián Castillo Colombia Deportivo Cali United States FC Dallas\t NaN ?
160262 172285 Camilo Sanvezzo Malta QFC Korea, South Gyeongnam FC NaN ?
160706 25677 Jeremiah White United States Revolution Serbia OFK Beograd NaN ?
160731 30686 Milován Mirosevic United Arab Emirates Al-Ain FC United States Columbus NaN ?
160736 30686 Milován Mirosevic Chile CDUC Argentina Racing Club NaN ?
160800 44036 Martín Bonjour Argentina Olimpo Uruguay Rentistas NaN ?
160820 48198 Marcelo Sarvas Costa Rica LD Alajuelense United States Los Angeles NaN ?
160863 53887 Emiliano Ariel Dudar Argentina CA Tigre Brazil Vasco da Gama NaN ?
160894 63403 Claudio Bieler Argentina CA Colón Chile Colo Colo NaN ?
160955 77096 Jhon Valencia Romania Otelul Galati Colombia Indep. Medellín NaN ?
160956 77096 Jhon Valencia Colombia Indep. Medellín Romania Otelul Galati NaN ?
161124 128530 Josué Martínez Costa Rica Saprissa United States Philadelphia NaN ?
161190 156056 Porfirio López China DL Shide United States Philadelphia NaN ?
161192 156056 Porfirio López Costa Rica Puntarenas FC China DL Shide NaN ?
161444 224418 Pipico Malta Tarxien Brazil Macaé NaN ?
161594 30586 Wálter Vílchez Peru Alianza Lima Argentina Olimpo NaN ?
161630 58616 José Manuel Rivera Finland RoPS United States Chivas USA NaN ?
161635 58616 José Manuel Rivera Slovakia Spartak Trnava Hungary Budapest Honvéd NaN ?
161704 74532 Gabriel Torres Venezuela Zamora United States Colorado NaN ?
161705 74532 Gabriel Torres Colombia CD La Equidad Venezuela Zamora NaN ?
161740 79758 Juan Anangonó Ecuador CD El Nacional Argentina Argentinos NaN ?
162224 9777 Dani Fragoso Spain CE L’Hospitalet United States Chivas USA NaN ?
162732 227390 Carlos Rivas Colombia Deportivo Cali United States Orlando NaN ?
162765 260381 Dairon Asprilla Colombia Atl. Nacional United States Timbers NaN ?
162834 313286 Michael Barrios Colombia Uniautónoma United States FC Dallas\t NaN ?
162846 325614 Mauro Manotas Colombia Uniautónoma United States Houston NaN ?

3625 rows × 8 columns


In [223]:
t=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&~(nonloans['toCountry'].isnull())&(nonloans['season'].between(2010,2015))].groupby(['toTeamId','toTeamName'])[['id','feeValue']].agg({'id':'count','feeValue':'sum'}).rename(columns={'id': '# of transfers','feeValue':"declared fees (euro)"})

t.sort_values("# of transfers",ascending=False)[:10]


Out[223]:
declared fees (euro) # of transfers
toTeamId toTeamName
162.0 Monaco 272910000.0 68
683.0 Olympiacos 72295000.0 57
3057.0 Standard Liège 34780000.0 55
410.0 Udinese Calcio 87380000.0 54
294.0 Benfica 188860000.0 52
6321.0 Vancouver 4064000.0 52
12.0 AS Roma 193799000.0 49
336.0 Sporting CP 72226000.0 47
3205.0 Kayserispor 12220000.0 45
631.0 Chelsea 552400000.0 45

In [20]:
t=nonloans[(nonloans['toTeamId'].isin([294,3057,336,720,157,172,631,683,820]))&(nonloans['toCountry']!=nonloans['fromCountry'])] 

teamIdMap=t.drop_duplicates('toTeamId')[['toTeamId','toTeamName']].set_index('toTeamId').to_dict()['toTeamName']

#t.groupby(['season','fromTeamId'])['id'].count().rolling(window=1,center=False).mean().unstack().rename(columns=teamIdMap).plot(figsize=(20,10),colormap='Set1')
t.groupby(['season','toTeamId'])['id'].count().rolling(window=1,center=False).mean().unstack().cumsum().rename(columns=teamIdMap).plot(figsize=(20,10),colormap='Set1')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a90a390>

In [221]:
teamIdMap


Out[221]:
{157.0: 'KAA Gent ',
 162.0: 'Monaco ',
 172.0: 'RSC Charleroi ',
 294.0: 'Benfica ',
 336.0: 'Sporting CP ',
 631.0: 'Chelsea ',
 683.0: 'Olympiacos ',
 720.0: 'FC Porto ',
 820.0: 'Genclerbirligi ',
 3057.0: 'Standard Li\xc3\xa8ge '}

In [27]:
nonloans[(nonloans['fromCountry']!='Romania')&(nonloans['toCountry']=='Cyprus')].groupby(('toTeamName','toLeague'))[['id']].count().sort_values('id',ascending=False)


Out[27]:
id
toTeamName toLeague
Omonia Nikosia ZYP1 66
Anor. Famagusta ZYP1 64
AEK Larnaca ZYP1 61
Apol. Limassol ZYP1 60
AEL Limassol ZYP1 59
APOEL Nicosia ZYP1 52
Nea Salamis ZYP1 48
En. Paralimni ZYP1 47
Ermis Aradippou ZYP1 45
D. Katokopias ZYP1 40
Ethnikos ZYP1 28
Olympiakos N. ZYP1 25
AEP Paphos ZYP1 21
Olympiakos N. Other Cyprus 21
Alki Larnaca ZYP1 19
APOP Kinyras Other Cyprus 17
Alki Larnaca Other Cyprus 16
AEP Paphos Other Cyprus 15
Aris Limassol ZYP1 11
Geroskipou Other Cyprus 9
Agia Napa ZYP1 9
Dighenis Morphou Other Cyprus 8
APOP Kinyras ZYP1 8
Onisilos Sotira Other Cyprus 5
APEP Pitsilia Other Cyprus 5
Aris Limassol Other Cyprus 4
Akritas Chlorakas Other Cyprus 4
ENAD Other Cyprus 4
Othellos Athienou ZYP1 4
Aradippou Other Cyprus 3
Anagennisi Dery Other Cyprus 3
Othellos Athienou Other Cyprus 3
Pafos FC ZYP1 3
AEK Larnaca Other Cyprus 3
D. Katokopias Other Cyprus 3
Karmiotissa Other Cyprus 2
Nikos & Sokrati Other Cyprus 2
Nea Salamis Other Cyprus 2
Magusa T. Gücü Other Cyprus 2
AEK Kouklia Other Cyprus 2
ZYP1 2
Cetinkaya TSK Other Cyprus 2
Agia Napa Other Cyprus 2
Frenaros Other Cyprus 1
Famagusta U21 Other Cyprus 1
Ermis Aradippou Other Cyprus 1
En. Paralimni Other Cyprus 1
Chalkanoras Other Cyprus 1
Anagennisi Dery ZYP1 1
ASIL Lysi Other Cyprus 1
PAEEK Other Cyprus 1
PO Ormidias Other Cyprus 1
Pafos FC Other Cyprus 1
APEP Pitsilia ZYP1 1
Türk Ocagi Other Cyprus 1

In [27]:
countrylist=['Romania','Cyprus','Greece']


destination=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['toCountry'].isin(['Greece','Cyprus','Romania','Bulgaria','Hungary']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    #.cumsum()
    # .rolling(window=3,center=False).mean() \
    
#destination2=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])] \
#    .rename(columns={'toCountry':'Country'}).groupby(['season'])['id'].count()


fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True,sharey=False)


#origin.plot(ax=axes[0],colormap='Dark2',title="cumulative total # of international transfers, by source country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes,colormap='Dark2',title="# of international transfers, by destination country",xlim=(1995,2015))
#destination2.plot(ax=axes,colormap='Dark2')

plt.subplots_adjust(wspace=0,hspace=0.1)

#fig.savefig('intl_transfers_by_country_cyprus_etc.png')


#ax1.set(sub1)



In [61]:
countrylist=['Romania','Cyprus','Greece']

origin=nonloans[('Italy'==nonloans['toCountry'])&(nonloans['fromCountry'].isin(['Argentina','Brazil']))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['toCountry'].isin(['Greece','Cyprus','Romania','Bulgaria','Hungary','Poland']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack()    .rolling(window=3,center=False).mean() 
    #.cumsum() \

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20,5),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Dark2',title="cumulative total # of international transfers, by source country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Dark2',title="cumulative total # of international transfers, by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

#fig.savefig('intl_transfers_by_country_cyprus_etc.png')


#ax1.set(sub1)



In [62]:
destination


Out[62]:
Country Bulgaria Cyprus Greece Hungary Poland Romania
season
1985 NaN NaN NaN NaN NaN NaN
1986 NaN NaN NaN NaN NaN NaN
1987 NaN NaN NaN NaN 1.333333 1.666667
1988 NaN NaN NaN NaN 1.333333 2.000000
1989 NaN NaN NaN NaN 1.000000 1.666667
1990 NaN NaN 1.000000 NaN 1.000000 1.333333
1991 NaN NaN 1.333333 NaN 1.333333 1.333333
1992 NaN NaN 3.000000 NaN 2.000000 2.333333
1993 NaN NaN 4.666667 NaN 3.666667 3.666667
1994 NaN NaN 6.333333 NaN 5.000000 5.666667
1995 NaN NaN 8.333333 3.000000 6.333333 8.333333
1996 NaN NaN 10.666667 4.333333 9.666667 11.333333
1997 2.333333 2.333333 15.666667 5.000000 15.000000 13.000000
1998 5.333333 2.666667 20.666667 5.666667 17.000000 13.666667
1999 8.333333 NaN 36.333333 10.666667 17.000000 14.666667
2000 12.000000 NaN 45.000000 16.000000 19.333333 18.000000
2001 18.000000 NaN 57.333333 25.666667 26.000000 20.666667
2002 23.000000 20.333333 66.333333 30.333333 37.333333 29.666667
2003 29.666667 29.666667 77.333333 34.666667 48.000000 37.000000
2004 29.000000 53.333333 99.666667 38.666667 63.333333 58.666667
2005 37.000000 73.666667 135.666667 56.333333 73.000000 70.666667
2006 39.000000 100.333333 177.333333 67.666667 85.333333 98.333333
2007 55.000000 126.333333 198.333333 83.333333 92.666667 127.000000
2008 71.333333 151.000000 198.333333 90.000000 98.000000 157.333333
2009 88.666667 175.333333 194.000000 118.000000 104.333333 160.666667
2010 95.666667 187.000000 197.333333 127.666667 132.333333 157.000000
2011 105.000000 182.666667 177.333333 144.000000 155.000000 157.000000
2012 130.666667 182.666667 174.666667 157.000000 163.333333 177.333333
2013 143.666667 173.666667 198.333333 158.333333 158.333333 201.000000
2014 135.333333 178.333333 245.333333 154.000000 164.000000 218.666667
2015 113.000000 167.000000 250.666667 124.000000 174.000000 227.666667

In [53]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True,sharey=False)

country='Netherlands'

w=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['feeValue']>0)&(nonloans['season']>=2000)]
t=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['feeValue']>0)&(nonloans['fromCountry'].str.startswith(country))&(nonloans['season']>=2000)]
s=nonloans[(nonloans['toCountry']!=nonloans['fromCountry'])&(nonloans['feeValue']>0)&(nonloans['toCountry'].str.startswith(country))&(nonloans['season']>=2000)]

w.groupby('season')['ageFloat'].mean().rolling(window=1,center=False).mean().rename('average transfer age').plot.line(ax=axes, ls='--', legend=True)
t.groupby('season')['ageFloat'].mean().rolling(window=1,center=False).mean().rename('exit age').plot.line(ax=axes,legend=True)
s.groupby('season')['ageFloat'].mean().rolling(window=1,center=False).mean().rename('entry age').plot.line(ax=axes, ls=':', legend=True)
#t[['season','Age']].groupby('season').describe()['Age'].unstack(level=0).ix['min':'max'].plot.box(ax=axes)

#(t.groupby('season')['Age'].count()).rolling(window=2,center=False).mean().plot.line(ax=axes)


Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x119fd0410>

In [199]:
#139150 
len(nonloans[nonloans['feeValue']!=0])


Out[199]:
87541

In [9]:
nonloans[nonloans['toCountry'].fillna('').str.startswith('Czech')].groupby('Nationality')['id'].count().sort_values(ascending=False)[:10]


Out[9]:
Nationality
Czech Republic        3783
Slovakia               489
Bosnia-Herzegovina      73
Brazil                  55
Serbia                  46
Croatia                 43
France                  20
Macedonia               19
Cote d'Ivoire           13
Slovenia                12
Name: id, dtype: int64

In [45]:
nonloans[(nonloans.feeValue>0)&(nonloans.fromCountry<>nonloans.toCountry)&nonloans.season.between(2000,2005)].groupby('fromCountry')['feeValue'].count().sort_values(ascending=False).plot.pie(figsize=(10,10))


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x11205bcd0>

In [23]:
origin=nonloans[('Romania'==nonloans['toCountry'])&(nonloans['Nationality'].isin(['Portugal','Italy','France','Russia','Brazil']))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Nationality'])['id'].count().unstack() \
    .cumsum()

destination=nonloans[('Romania'==nonloans['fromCountry'])&(nonloans['Nationality'].isin(['Portugal','Italy','France','Russia','Brazil']))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Nationality'])['id'].count().unstack() \
    .cumsum()
    
    
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Dark2',title="cumulative total # of intl transfers to Romania, by player nationality",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Dark2',title="cumulative total # of intl transfers from Romania, by player nationality",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_transfers_by_nationality_romania.png')


#ax1.set(sub1)



In [24]:
countrylist=['Brazil','England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia']

origin=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['fromCountry'].isin(countrylist))] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=loans[(loans['fromCountry']!=loans['toCountry'])&(loans['toCountry'].isin(countrylist))] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()


fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10),sharex=True,sharey=False)


origin.plot(ax=axes[0],colormap='Set1',title="cumulative total # of international loans, by source country",xticks=[1995,2000,2005,2010,2015])
destination.plot(ax=axes[1],colormap='Set1',title="cumulative total # of international loans, by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_loans_by_country.png')


#ax1.set(sub1)



In [25]:
countrylist=['Brazil','England','Spain','Germany','Italy','France','Netherlands','Portugal','Russia']

origin=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['fromCountry'].isin(countrylist))&(nonloans['Age']<27)] \
    .rename(columns={'fromCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()
destination=nonloans[(nonloans['fromCountry']!=nonloans['toCountry'])&(nonloans['toCountry'].isin(countrylist))&(nonloans['Age']<27)] \
    .rename(columns={'toCountry':'Country'}).groupby(['season','Country'])['id'].count().unstack() \
    .cumsum()


fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,5),sharex=True,sharey=False)


origin.plot(ax=axes,colormap='Set1',title="cumulative total # of under-27 international transfers, by source country",xticks=[1995,2000,2005,2010,2015])
#destination.plot(ax=axes[1],colormap='Set1',title="cumulative total # of under-27 international transfers, by destination country",xlim=(1995,2015))

plt.subplots_adjust(wspace=0,hspace=0.1)

fig.savefig('intl_u27_transfers_by_country.png')


#ax1.set(sub1)



In [26]:
nonloans[(nonloans['season'].between(2000,2015))].groupby('toLeague')['feeValue'].sum().sort_values(ascending=False)[:100].plot.pie()


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d2b7150>

In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 
import math
import matplotlib.pyplot as plt

transfers = pd.read_csv('../data/merged/transfers.csv')
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')

leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
                                                         'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
                                                         'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}

teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
    transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
    .sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
    
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (40,45,47,49,51,52) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (45,47) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (45,47,49,52) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (32,34) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [2]:
print len(players)
print len(transfers)
print transfers['id'].nunique()
print transfers[transfers.feeValue>0]['id'].nunique()
print len(nonloans)
print len(loans)


57678
346296
55775
12736
302872
43424
#before 27081 174779 24813 148185 26594
#after 41774 265726 40800 11342 230715 35011
#more after 50831 318818 49408 12501 277815 41003
#even more 57678 346296 55775 12736 302872 43424

In [43]:
ew = pd.ExcelWriter('../sample.xls', encoding='utf-8')
transfers[transfers.id.isin(transfers['id'].drop_duplicates()[:1000])].to_excel(ew)
ew.save()